files.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. import logging
  2. import os
  3. import uuid
  4. from pathlib import Path
  5. from typing import Optional
  6. from urllib.parse import quote
  7. from fastapi import (
  8. APIRouter,
  9. Depends,
  10. File,
  11. HTTPException,
  12. Request,
  13. UploadFile,
  14. status,
  15. Query,
  16. )
  17. from fastapi.responses import FileResponse, StreamingResponse
  18. from open_webui.constants import ERROR_MESSAGES
  19. from open_webui.env import SRC_LOG_LEVELS
  20. from open_webui.models.files import (
  21. FileForm,
  22. FileModel,
  23. FileModelResponse,
  24. Files,
  25. )
  26. from open_webui.models.knowledge import Knowledges
  27. from open_webui.routers.knowledge import get_knowledge, get_knowledge_list
  28. from open_webui.routers.retrieval import ProcessFileForm, process_file
  29. from open_webui.routers.audio import transcribe
  30. from open_webui.storage.provider import Storage
  31. from open_webui.utils.auth import get_admin_user, get_verified_user
  32. from pydantic import BaseModel
  33. log = logging.getLogger(__name__)
  34. log.setLevel(SRC_LOG_LEVELS["MODELS"])
  35. router = APIRouter()
  36. ############################
  37. # Check if the current user has access to a file through any knowledge bases the user may be in.
  38. ############################
  39. def has_access_to_file(
  40. file_id: Optional[str], access_type: str, user=Depends(get_verified_user)
  41. ) -> bool:
  42. file = Files.get_file_by_id(file_id)
  43. log.debug(f"Checking if user has {access_type} access to file")
  44. if not file:
  45. raise HTTPException(
  46. status_code=status.HTTP_404_NOT_FOUND,
  47. detail=ERROR_MESSAGES.NOT_FOUND,
  48. )
  49. has_access = False
  50. knowledge_base_id = file.meta.get("collection_name") if file.meta else None
  51. if knowledge_base_id:
  52. knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(
  53. user.id, access_type
  54. )
  55. for knowledge_base in knowledge_bases:
  56. if knowledge_base.id == knowledge_base_id:
  57. has_access = True
  58. break
  59. return has_access
  60. ############################
  61. # Upload File
  62. ############################
  63. @router.post("/", response_model=FileModelResponse)
  64. def upload_file(
  65. request: Request,
  66. file: UploadFile = File(...),
  67. user=Depends(get_verified_user),
  68. file_metadata: dict = {},
  69. process: bool = Query(True),
  70. ):
  71. log.info(f"file.content_type: {file.content_type}")
  72. try:
  73. unsanitized_filename = file.filename
  74. filename = os.path.basename(unsanitized_filename)
  75. # replace filename with uuid
  76. id = str(uuid.uuid4())
  77. name = filename
  78. filename = f"{id}_{filename}"
  79. contents, file_path = Storage.upload_file(file.file, filename)
  80. file_item = Files.insert_new_file(
  81. user.id,
  82. FileForm(
  83. **{
  84. "id": id,
  85. "filename": name,
  86. "path": file_path,
  87. "meta": {
  88. "name": name,
  89. "content_type": file.content_type,
  90. "size": len(contents),
  91. "data": file_metadata,
  92. },
  93. }
  94. ),
  95. )
  96. if process:
  97. try:
  98. if file.content_type in [
  99. "audio/mpeg",
  100. "audio/wav",
  101. "audio/ogg",
  102. "audio/x-m4a",
  103. ]:
  104. file_path = Storage.get_file(file_path)
  105. result = transcribe(request, file_path)
  106. process_file(
  107. request,
  108. ProcessFileForm(file_id=id, content=result.get("text", "")),
  109. user=user,
  110. )
  111. elif file.content_type not in ["image/png", "image/jpeg", "image/gif"]:
  112. process_file(request, ProcessFileForm(file_id=id), user=user)
  113. file_item = Files.get_file_by_id(id=id)
  114. except Exception as e:
  115. log.exception(e)
  116. log.error(f"Error processing file: {file_item.id}")
  117. file_item = FileModelResponse(
  118. **{
  119. **file_item.model_dump(),
  120. "error": str(e.detail) if hasattr(e, "detail") else str(e),
  121. }
  122. )
  123. if file_item:
  124. return file_item
  125. else:
  126. raise HTTPException(
  127. status_code=status.HTTP_400_BAD_REQUEST,
  128. detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
  129. )
  130. except Exception as e:
  131. log.exception(e)
  132. raise HTTPException(
  133. status_code=status.HTTP_400_BAD_REQUEST,
  134. detail=ERROR_MESSAGES.DEFAULT(e),
  135. )
  136. ############################
  137. # List Files
  138. ############################
  139. @router.get("/", response_model=list[FileModelResponse])
  140. async def list_files(user=Depends(get_verified_user)):
  141. if user.role == "admin":
  142. files = Files.get_files()
  143. else:
  144. files = Files.get_files_by_user_id(user.id)
  145. return files
  146. ############################
  147. # Delete All Files
  148. ############################
  149. @router.delete("/all")
  150. async def delete_all_files(user=Depends(get_admin_user)):
  151. result = Files.delete_all_files()
  152. if result:
  153. try:
  154. Storage.delete_all_files()
  155. except Exception as e:
  156. log.exception(e)
  157. log.error("Error deleting files")
  158. raise HTTPException(
  159. status_code=status.HTTP_400_BAD_REQUEST,
  160. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  161. )
  162. return {"message": "All files deleted successfully"}
  163. else:
  164. raise HTTPException(
  165. status_code=status.HTTP_400_BAD_REQUEST,
  166. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  167. )
  168. ############################
  169. # Get File By Id
  170. ############################
  171. @router.get("/{id}", response_model=Optional[FileModel])
  172. async def get_file_by_id(id: str, user=Depends(get_verified_user)):
  173. file = Files.get_file_by_id(id)
  174. if not file:
  175. raise HTTPException(
  176. status_code=status.HTTP_404_NOT_FOUND,
  177. detail=ERROR_MESSAGES.NOT_FOUND,
  178. )
  179. if (
  180. file.user_id == user.id
  181. or user.role == "admin"
  182. or has_access_to_file(id, "read", user)
  183. ):
  184. return file
  185. else:
  186. raise HTTPException(
  187. status_code=status.HTTP_404_NOT_FOUND,
  188. detail=ERROR_MESSAGES.NOT_FOUND,
  189. )
  190. ############################
  191. # Get File Data Content By Id
  192. ############################
  193. @router.get("/{id}/data/content")
  194. async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
  195. file = Files.get_file_by_id(id)
  196. if not file:
  197. raise HTTPException(
  198. status_code=status.HTTP_404_NOT_FOUND,
  199. detail=ERROR_MESSAGES.NOT_FOUND,
  200. )
  201. if (
  202. file.user_id == user.id
  203. or user.role == "admin"
  204. or has_access_to_file(id, "read", user)
  205. ):
  206. return {"content": file.data.get("content", "")}
  207. else:
  208. raise HTTPException(
  209. status_code=status.HTTP_404_NOT_FOUND,
  210. detail=ERROR_MESSAGES.NOT_FOUND,
  211. )
  212. ############################
  213. # Update File Data Content By Id
  214. ############################
  215. class ContentForm(BaseModel):
  216. content: str
  217. @router.post("/{id}/data/content/update")
  218. async def update_file_data_content_by_id(
  219. request: Request, id: str, form_data: ContentForm, user=Depends(get_verified_user)
  220. ):
  221. file = Files.get_file_by_id(id)
  222. if not file:
  223. raise HTTPException(
  224. status_code=status.HTTP_404_NOT_FOUND,
  225. detail=ERROR_MESSAGES.NOT_FOUND,
  226. )
  227. if (
  228. file.user_id == user.id
  229. or user.role == "admin"
  230. or has_access_to_file(id, "write", user)
  231. ):
  232. try:
  233. process_file(
  234. request,
  235. ProcessFileForm(file_id=id, content=form_data.content),
  236. user=user,
  237. )
  238. file = Files.get_file_by_id(id=id)
  239. except Exception as e:
  240. log.exception(e)
  241. log.error(f"Error processing file: {file.id}")
  242. return {"content": file.data.get("content", "")}
  243. else:
  244. raise HTTPException(
  245. status_code=status.HTTP_404_NOT_FOUND,
  246. detail=ERROR_MESSAGES.NOT_FOUND,
  247. )
  248. ############################
  249. # Get File Content By Id
  250. ############################
  251. @router.get("/{id}/content")
  252. async def get_file_content_by_id(
  253. id: str, user=Depends(get_verified_user), attachment: bool = Query(False)
  254. ):
  255. file = Files.get_file_by_id(id)
  256. if not file:
  257. raise HTTPException(
  258. status_code=status.HTTP_404_NOT_FOUND,
  259. detail=ERROR_MESSAGES.NOT_FOUND,
  260. )
  261. if (
  262. file.user_id == user.id
  263. or user.role == "admin"
  264. or has_access_to_file(id, "read", user)
  265. ):
  266. try:
  267. file_path = Storage.get_file(file.path)
  268. file_path = Path(file_path)
  269. # Check if the file already exists in the cache
  270. if file_path.is_file():
  271. # Handle Unicode filenames
  272. filename = file.meta.get("name", file.filename)
  273. encoded_filename = quote(filename) # RFC5987 encoding
  274. content_type = file.meta.get("content_type")
  275. filename = file.meta.get("name", file.filename)
  276. encoded_filename = quote(filename)
  277. headers = {}
  278. if attachment:
  279. headers["Content-Disposition"] = (
  280. f"attachment; filename*=UTF-8''{encoded_filename}"
  281. )
  282. else:
  283. if content_type == "application/pdf" or filename.lower().endswith(
  284. ".pdf"
  285. ):
  286. headers["Content-Disposition"] = (
  287. f"inline; filename*=UTF-8''{encoded_filename}"
  288. )
  289. content_type = "application/pdf"
  290. elif content_type != "text/plain":
  291. headers["Content-Disposition"] = (
  292. f"attachment; filename*=UTF-8''{encoded_filename}"
  293. )
  294. return FileResponse(file_path, headers=headers, media_type=content_type)
  295. else:
  296. raise HTTPException(
  297. status_code=status.HTTP_404_NOT_FOUND,
  298. detail=ERROR_MESSAGES.NOT_FOUND,
  299. )
  300. except Exception as e:
  301. log.exception(e)
  302. log.error("Error getting file content")
  303. raise HTTPException(
  304. status_code=status.HTTP_400_BAD_REQUEST,
  305. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  306. )
  307. else:
  308. raise HTTPException(
  309. status_code=status.HTTP_404_NOT_FOUND,
  310. detail=ERROR_MESSAGES.NOT_FOUND,
  311. )
  312. @router.get("/{id}/content/html")
  313. async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
  314. file = Files.get_file_by_id(id)
  315. if not file:
  316. raise HTTPException(
  317. status_code=status.HTTP_404_NOT_FOUND,
  318. detail=ERROR_MESSAGES.NOT_FOUND,
  319. )
  320. if (
  321. file.user_id == user.id
  322. or user.role == "admin"
  323. or has_access_to_file(id, "read", user)
  324. ):
  325. try:
  326. file_path = Storage.get_file(file.path)
  327. file_path = Path(file_path)
  328. # Check if the file already exists in the cache
  329. if file_path.is_file():
  330. log.info(f"file_path: {file_path}")
  331. return FileResponse(file_path)
  332. else:
  333. raise HTTPException(
  334. status_code=status.HTTP_404_NOT_FOUND,
  335. detail=ERROR_MESSAGES.NOT_FOUND,
  336. )
  337. except Exception as e:
  338. log.exception(e)
  339. log.error("Error getting file content")
  340. raise HTTPException(
  341. status_code=status.HTTP_400_BAD_REQUEST,
  342. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  343. )
  344. else:
  345. raise HTTPException(
  346. status_code=status.HTTP_404_NOT_FOUND,
  347. detail=ERROR_MESSAGES.NOT_FOUND,
  348. )
  349. @router.get("/{id}/content/{file_name}")
  350. async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
  351. file = Files.get_file_by_id(id)
  352. if not file:
  353. raise HTTPException(
  354. status_code=status.HTTP_404_NOT_FOUND,
  355. detail=ERROR_MESSAGES.NOT_FOUND,
  356. )
  357. if (
  358. file.user_id == user.id
  359. or user.role == "admin"
  360. or has_access_to_file(id, "read", user)
  361. ):
  362. file_path = file.path
  363. # Handle Unicode filenames
  364. filename = file.meta.get("name", file.filename)
  365. encoded_filename = quote(filename) # RFC5987 encoding
  366. headers = {
  367. "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
  368. }
  369. if file_path:
  370. file_path = Storage.get_file(file_path)
  371. file_path = Path(file_path)
  372. # Check if the file already exists in the cache
  373. if file_path.is_file():
  374. return FileResponse(file_path, headers=headers)
  375. else:
  376. raise HTTPException(
  377. status_code=status.HTTP_404_NOT_FOUND,
  378. detail=ERROR_MESSAGES.NOT_FOUND,
  379. )
  380. else:
  381. # File path doesn’t exist, return the content as .txt if possible
  382. file_content = file.content.get("content", "")
  383. file_name = file.filename
  384. # Create a generator that encodes the file content
  385. def generator():
  386. yield file_content.encode("utf-8")
  387. return StreamingResponse(
  388. generator(),
  389. media_type="text/plain",
  390. headers=headers,
  391. )
  392. else:
  393. raise HTTPException(
  394. status_code=status.HTTP_404_NOT_FOUND,
  395. detail=ERROR_MESSAGES.NOT_FOUND,
  396. )
  397. ############################
  398. # Delete File By Id
  399. ############################
  400. @router.delete("/{id}")
  401. async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
  402. file = Files.get_file_by_id(id)
  403. if not file:
  404. raise HTTPException(
  405. status_code=status.HTTP_404_NOT_FOUND,
  406. detail=ERROR_MESSAGES.NOT_FOUND,
  407. )
  408. if (
  409. file.user_id == user.id
  410. or user.role == "admin"
  411. or has_access_to_file(id, "write", user)
  412. ):
  413. # We should add Chroma cleanup here
  414. result = Files.delete_file_by_id(id)
  415. if result:
  416. try:
  417. Storage.delete_file(file.path)
  418. except Exception as e:
  419. log.exception(e)
  420. log.error("Error deleting files")
  421. raise HTTPException(
  422. status_code=status.HTTP_400_BAD_REQUEST,
  423. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  424. )
  425. return {"message": "File deleted successfully"}
  426. else:
  427. raise HTTPException(
  428. status_code=status.HTTP_400_BAD_REQUEST,
  429. detail=ERROR_MESSAGES.DEFAULT("Error deleting file"),
  430. )
  431. else:
  432. raise HTTPException(
  433. status_code=status.HTTP_404_NOT_FOUND,
  434. detail=ERROR_MESSAGES.NOT_FOUND,
  435. )