files.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. import logging
  2. import os
  3. import uuid
  4. from pathlib import Path
  5. from typing import Optional
  6. from urllib.parse import quote
  7. from fastapi import (
  8. APIRouter,
  9. Depends,
  10. File,
  11. HTTPException,
  12. Request,
  13. UploadFile,
  14. status,
  15. Query,
  16. )
  17. from fastapi.responses import FileResponse, StreamingResponse
  18. from open_webui.constants import ERROR_MESSAGES
  19. from open_webui.env import SRC_LOG_LEVELS
  20. from open_webui.models.files import (
  21. FileForm,
  22. FileModel,
  23. FileModelResponse,
  24. Files,
  25. )
  26. from open_webui.models.knowledge import Knowledges
  27. from open_webui.routers.knowledge import get_knowledge, get_knowledge_list
  28. from open_webui.routers.retrieval import ProcessFileForm, process_file
  29. from open_webui.routers.audio import transcribe
  30. from open_webui.storage.provider import Storage
  31. from open_webui.utils.auth import get_admin_user, get_verified_user
  32. from pydantic import BaseModel
  33. log = logging.getLogger(__name__)
  34. log.setLevel(SRC_LOG_LEVELS["MODELS"])
  35. router = APIRouter()
  36. ############################
  37. # Check if the current user has access to a file through any knowledge bases the user may be in.
  38. ############################
  39. def has_access_to_file(
  40. file_id: Optional[str], access_type: str, user=Depends(get_verified_user)
  41. ) -> bool:
  42. file = Files.get_file_by_id(file_id)
  43. log.debug(f"Checking if user has {access_type} access to file")
  44. if not file:
  45. raise HTTPException(
  46. status_code=status.HTTP_404_NOT_FOUND,
  47. detail=ERROR_MESSAGES.NOT_FOUND,
  48. )
  49. has_access = False
  50. knowledge_base_id = file.meta.get("collection_name") if file.meta else None
  51. if knowledge_base_id:
  52. knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(
  53. user.id, access_type
  54. )
  55. for knowledge_base in knowledge_bases:
  56. if knowledge_base.id == knowledge_base_id:
  57. has_access = True
  58. break
  59. return has_access
  60. ############################
  61. # Upload File
  62. ############################
  63. @router.post("/", response_model=FileModelResponse)
  64. def upload_file(
  65. request: Request,
  66. file: UploadFile = File(...),
  67. user=Depends(get_verified_user),
  68. file_metadata: dict = {},
  69. process: bool = Query(True),
  70. ):
  71. log.info(f"file.content_type: {file.content_type}")
  72. try:
  73. unsanitized_filename = file.filename
  74. filename = os.path.basename(unsanitized_filename)
  75. # replace filename with uuid
  76. id = str(uuid.uuid4())
  77. name = filename
  78. filename = f"{id}_{filename}"
  79. contents, file_path = Storage.upload_file(file.file, filename)
  80. file_item = Files.insert_new_file(
  81. user.id,
  82. FileForm(
  83. **{
  84. "id": id,
  85. "filename": name,
  86. "path": file_path,
  87. "meta": {
  88. "name": name,
  89. "content_type": file.content_type,
  90. "size": len(contents),
  91. "data": file_metadata,
  92. },
  93. }
  94. ),
  95. )
  96. if process:
  97. try:
  98. if file.content_type in [
  99. "audio/mpeg",
  100. "audio/wav",
  101. "audio/ogg",
  102. "audio/x-m4a",
  103. ]:
  104. file_path = Storage.get_file(file_path)
  105. result = transcribe(request, file_path)
  106. process_file(
  107. request,
  108. ProcessFileForm(file_id=id, content=result.get("text", "")),
  109. user=user,
  110. )
  111. elif file.content_type not in ["image/png", "image/jpeg", "image/gif"]:
  112. process_file(request, ProcessFileForm(file_id=id), user=user)
  113. file_item = Files.get_file_by_id(id=id)
  114. except Exception as e:
  115. log.exception(e)
  116. log.error(f"Error processing file: {file_item.id}")
  117. file_item = FileModelResponse(
  118. **{
  119. **file_item.model_dump(),
  120. "error": str(e.detail) if hasattr(e, "detail") else str(e),
  121. }
  122. )
  123. if file_item:
  124. return file_item
  125. else:
  126. raise HTTPException(
  127. status_code=status.HTTP_400_BAD_REQUEST,
  128. detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
  129. )
  130. except Exception as e:
  131. log.exception(e)
  132. raise HTTPException(
  133. status_code=status.HTTP_400_BAD_REQUEST,
  134. detail=ERROR_MESSAGES.DEFAULT(e),
  135. )
  136. ############################
  137. # List Files
  138. ############################
  139. @router.get("/", response_model=list[FileModelResponse])
  140. async def list_files(user=Depends(get_verified_user), content: bool = Query(True)):
  141. if user.role == "admin":
  142. files = Files.get_files()
  143. else:
  144. files = Files.get_files_by_user_id(user.id)
  145. if not content:
  146. for file in files:
  147. del file.data["content"]
  148. return files
  149. ############################
  150. # Delete All Files
  151. ############################
  152. @router.delete("/all")
  153. async def delete_all_files(user=Depends(get_admin_user)):
  154. result = Files.delete_all_files()
  155. if result:
  156. try:
  157. Storage.delete_all_files()
  158. except Exception as e:
  159. log.exception(e)
  160. log.error("Error deleting files")
  161. raise HTTPException(
  162. status_code=status.HTTP_400_BAD_REQUEST,
  163. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  164. )
  165. return {"message": "All files deleted successfully"}
  166. else:
  167. raise HTTPException(
  168. status_code=status.HTTP_400_BAD_REQUEST,
  169. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  170. )
  171. ############################
  172. # Get File By Id
  173. ############################
  174. @router.get("/{id}", response_model=Optional[FileModel])
  175. async def get_file_by_id(id: str, user=Depends(get_verified_user)):
  176. file = Files.get_file_by_id(id)
  177. if not file:
  178. raise HTTPException(
  179. status_code=status.HTTP_404_NOT_FOUND,
  180. detail=ERROR_MESSAGES.NOT_FOUND,
  181. )
  182. if (
  183. file.user_id == user.id
  184. or user.role == "admin"
  185. or has_access_to_file(id, "read", user)
  186. ):
  187. return file
  188. else:
  189. raise HTTPException(
  190. status_code=status.HTTP_404_NOT_FOUND,
  191. detail=ERROR_MESSAGES.NOT_FOUND,
  192. )
  193. ############################
  194. # Get File Data Content By Id
  195. ############################
  196. @router.get("/{id}/data/content")
  197. async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
  198. file = Files.get_file_by_id(id)
  199. if not file:
  200. raise HTTPException(
  201. status_code=status.HTTP_404_NOT_FOUND,
  202. detail=ERROR_MESSAGES.NOT_FOUND,
  203. )
  204. if (
  205. file.user_id == user.id
  206. or user.role == "admin"
  207. or has_access_to_file(id, "read", user)
  208. ):
  209. return {"content": file.data.get("content", "")}
  210. else:
  211. raise HTTPException(
  212. status_code=status.HTTP_404_NOT_FOUND,
  213. detail=ERROR_MESSAGES.NOT_FOUND,
  214. )
  215. ############################
  216. # Update File Data Content By Id
  217. ############################
  218. class ContentForm(BaseModel):
  219. content: str
  220. @router.post("/{id}/data/content/update")
  221. async def update_file_data_content_by_id(
  222. request: Request, id: str, form_data: ContentForm, user=Depends(get_verified_user)
  223. ):
  224. file = Files.get_file_by_id(id)
  225. if not file:
  226. raise HTTPException(
  227. status_code=status.HTTP_404_NOT_FOUND,
  228. detail=ERROR_MESSAGES.NOT_FOUND,
  229. )
  230. if (
  231. file.user_id == user.id
  232. or user.role == "admin"
  233. or has_access_to_file(id, "write", user)
  234. ):
  235. try:
  236. process_file(
  237. request,
  238. ProcessFileForm(file_id=id, content=form_data.content),
  239. user=user,
  240. )
  241. file = Files.get_file_by_id(id=id)
  242. except Exception as e:
  243. log.exception(e)
  244. log.error(f"Error processing file: {file.id}")
  245. return {"content": file.data.get("content", "")}
  246. else:
  247. raise HTTPException(
  248. status_code=status.HTTP_404_NOT_FOUND,
  249. detail=ERROR_MESSAGES.NOT_FOUND,
  250. )
  251. ############################
  252. # Get File Content By Id
  253. ############################
  254. @router.get("/{id}/content")
  255. async def get_file_content_by_id(
  256. id: str, user=Depends(get_verified_user), attachment: bool = Query(False)
  257. ):
  258. file = Files.get_file_by_id(id)
  259. if not file:
  260. raise HTTPException(
  261. status_code=status.HTTP_404_NOT_FOUND,
  262. detail=ERROR_MESSAGES.NOT_FOUND,
  263. )
  264. if (
  265. file.user_id == user.id
  266. or user.role == "admin"
  267. or has_access_to_file(id, "read", user)
  268. ):
  269. try:
  270. file_path = Storage.get_file(file.path)
  271. file_path = Path(file_path)
  272. # Check if the file already exists in the cache
  273. if file_path.is_file():
  274. # Handle Unicode filenames
  275. filename = file.meta.get("name", file.filename)
  276. encoded_filename = quote(filename) # RFC5987 encoding
  277. content_type = file.meta.get("content_type")
  278. filename = file.meta.get("name", file.filename)
  279. encoded_filename = quote(filename)
  280. headers = {}
  281. if attachment:
  282. headers["Content-Disposition"] = (
  283. f"attachment; filename*=UTF-8''{encoded_filename}"
  284. )
  285. else:
  286. if content_type == "application/pdf" or filename.lower().endswith(
  287. ".pdf"
  288. ):
  289. headers["Content-Disposition"] = (
  290. f"inline; filename*=UTF-8''{encoded_filename}"
  291. )
  292. content_type = "application/pdf"
  293. elif content_type != "text/plain":
  294. headers["Content-Disposition"] = (
  295. f"attachment; filename*=UTF-8''{encoded_filename}"
  296. )
  297. return FileResponse(file_path, headers=headers, media_type=content_type)
  298. else:
  299. raise HTTPException(
  300. status_code=status.HTTP_404_NOT_FOUND,
  301. detail=ERROR_MESSAGES.NOT_FOUND,
  302. )
  303. except Exception as e:
  304. log.exception(e)
  305. log.error("Error getting file content")
  306. raise HTTPException(
  307. status_code=status.HTTP_400_BAD_REQUEST,
  308. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  309. )
  310. else:
  311. raise HTTPException(
  312. status_code=status.HTTP_404_NOT_FOUND,
  313. detail=ERROR_MESSAGES.NOT_FOUND,
  314. )
  315. @router.get("/{id}/content/html")
  316. async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
  317. file = Files.get_file_by_id(id)
  318. if not file:
  319. raise HTTPException(
  320. status_code=status.HTTP_404_NOT_FOUND,
  321. detail=ERROR_MESSAGES.NOT_FOUND,
  322. )
  323. if (
  324. file.user_id == user.id
  325. or user.role == "admin"
  326. or has_access_to_file(id, "read", user)
  327. ):
  328. try:
  329. file_path = Storage.get_file(file.path)
  330. file_path = Path(file_path)
  331. # Check if the file already exists in the cache
  332. if file_path.is_file():
  333. log.info(f"file_path: {file_path}")
  334. return FileResponse(file_path)
  335. else:
  336. raise HTTPException(
  337. status_code=status.HTTP_404_NOT_FOUND,
  338. detail=ERROR_MESSAGES.NOT_FOUND,
  339. )
  340. except Exception as e:
  341. log.exception(e)
  342. log.error("Error getting file content")
  343. raise HTTPException(
  344. status_code=status.HTTP_400_BAD_REQUEST,
  345. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  346. )
  347. else:
  348. raise HTTPException(
  349. status_code=status.HTTP_404_NOT_FOUND,
  350. detail=ERROR_MESSAGES.NOT_FOUND,
  351. )
  352. @router.get("/{id}/content/{file_name}")
  353. async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
  354. file = Files.get_file_by_id(id)
  355. if not file:
  356. raise HTTPException(
  357. status_code=status.HTTP_404_NOT_FOUND,
  358. detail=ERROR_MESSAGES.NOT_FOUND,
  359. )
  360. if (
  361. file.user_id == user.id
  362. or user.role == "admin"
  363. or has_access_to_file(id, "read", user)
  364. ):
  365. file_path = file.path
  366. # Handle Unicode filenames
  367. filename = file.meta.get("name", file.filename)
  368. encoded_filename = quote(filename) # RFC5987 encoding
  369. headers = {
  370. "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
  371. }
  372. if file_path:
  373. file_path = Storage.get_file(file_path)
  374. file_path = Path(file_path)
  375. # Check if the file already exists in the cache
  376. if file_path.is_file():
  377. return FileResponse(file_path, headers=headers)
  378. else:
  379. raise HTTPException(
  380. status_code=status.HTTP_404_NOT_FOUND,
  381. detail=ERROR_MESSAGES.NOT_FOUND,
  382. )
  383. else:
  384. # File path doesn’t exist, return the content as .txt if possible
  385. file_content = file.content.get("content", "")
  386. file_name = file.filename
  387. # Create a generator that encodes the file content
  388. def generator():
  389. yield file_content.encode("utf-8")
  390. return StreamingResponse(
  391. generator(),
  392. media_type="text/plain",
  393. headers=headers,
  394. )
  395. else:
  396. raise HTTPException(
  397. status_code=status.HTTP_404_NOT_FOUND,
  398. detail=ERROR_MESSAGES.NOT_FOUND,
  399. )
  400. ############################
  401. # Delete File By Id
  402. ############################
  403. @router.delete("/{id}")
  404. async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
  405. file = Files.get_file_by_id(id)
  406. if not file:
  407. raise HTTPException(
  408. status_code=status.HTTP_404_NOT_FOUND,
  409. detail=ERROR_MESSAGES.NOT_FOUND,
  410. )
  411. if (
  412. file.user_id == user.id
  413. or user.role == "admin"
  414. or has_access_to_file(id, "write", user)
  415. ):
  416. # We should add Chroma cleanup here
  417. result = Files.delete_file_by_id(id)
  418. if result:
  419. try:
  420. Storage.delete_file(file.path)
  421. except Exception as e:
  422. log.exception(e)
  423. log.error("Error deleting files")
  424. raise HTTPException(
  425. status_code=status.HTTP_400_BAD_REQUEST,
  426. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  427. )
  428. return {"message": "File deleted successfully"}
  429. else:
  430. raise HTTPException(
  431. status_code=status.HTTP_400_BAD_REQUEST,
  432. detail=ERROR_MESSAGES.DEFAULT("Error deleting file"),
  433. )
  434. else:
  435. raise HTTPException(
  436. status_code=status.HTTP_404_NOT_FOUND,
  437. detail=ERROR_MESSAGES.NOT_FOUND,
  438. )