files.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. import logging
  2. import os
  3. import uuid
  4. from pathlib import Path
  5. from typing import Optional
  6. from urllib.parse import quote
  7. from fastapi import (
  8. APIRouter,
  9. Depends,
  10. File,
  11. HTTPException,
  12. Request,
  13. UploadFile,
  14. status,
  15. Query,
  16. )
  17. from fastapi.responses import FileResponse, StreamingResponse
  18. from open_webui.constants import ERROR_MESSAGES
  19. from open_webui.env import SRC_LOG_LEVELS
  20. from open_webui.models.files import (
  21. FileForm,
  22. FileModel,
  23. FileModelResponse,
  24. Files,
  25. )
  26. from open_webui.models.knowledge import Knowledges
  27. from open_webui.routers.knowledge import get_knowledge, get_knowledge_list
  28. from open_webui.routers.retrieval import ProcessFileForm, process_file
  29. from open_webui.routers.audio import transcribe
  30. from open_webui.storage.provider import Storage
  31. from open_webui.utils.auth import get_admin_user, get_verified_user
  32. from pydantic import BaseModel
  33. log = logging.getLogger(__name__)
  34. log.setLevel(SRC_LOG_LEVELS["MODELS"])
  35. router = APIRouter()
  36. ############################
  37. # Check if the current user has access to a file through any knowledge bases the user may be in.
  38. ############################
  39. def has_access_to_file(
  40. file_id: Optional[str], access_type: str, user=Depends(get_verified_user)
  41. ) -> bool:
  42. file = Files.get_file_by_id(file_id)
  43. log.debug(f"Checking if user has {access_type} access to file")
  44. if not file:
  45. raise HTTPException(
  46. status_code=status.HTTP_404_NOT_FOUND,
  47. detail=ERROR_MESSAGES.NOT_FOUND,
  48. )
  49. has_access = False
  50. knowledge_base_id = file.meta.get("collection_name") if file.meta else None
  51. if knowledge_base_id:
  52. knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(
  53. user.id, access_type
  54. )
  55. for knowledge_base in knowledge_bases:
  56. if knowledge_base.id == knowledge_base_id:
  57. has_access = True
  58. break
  59. return has_access
  60. ############################
  61. # Upload File
  62. ############################
  63. @router.post("/", response_model=FileModelResponse)
  64. def upload_file(
  65. request: Request,
  66. file: UploadFile = File(...),
  67. user=Depends(get_verified_user),
  68. file_metadata: dict = {},
  69. process: bool = Query(True),
  70. ):
  71. log.info(f"file.content_type: {file.content_type}")
  72. try:
  73. unsanitized_filename = file.filename
  74. filename = os.path.basename(unsanitized_filename)
  75. # replace filename with uuid
  76. id = str(uuid.uuid4())
  77. name = filename
  78. filename = f"{id}_{filename}"
  79. contents, file_path = Storage.upload_file(file.file, filename)
  80. file_item = Files.insert_new_file(
  81. user.id,
  82. FileForm(
  83. **{
  84. "id": id,
  85. "filename": name,
  86. "path": file_path,
  87. "meta": {
  88. "name": name,
  89. "content_type": file.content_type,
  90. "size": len(contents),
  91. "data": file_metadata,
  92. },
  93. }
  94. ),
  95. )
  96. if process:
  97. try:
  98. if file.content_type in [
  99. "audio/mpeg",
  100. "audio/wav",
  101. "audio/ogg",
  102. "audio/x-m4a",
  103. ]:
  104. file_path = Storage.get_file(file_path)
  105. result = transcribe(request, file_path)
  106. process_file(
  107. request,
  108. ProcessFileForm(file_id=id, content=result.get("text", "")),
  109. user=user,
  110. )
  111. elif file.content_type not in ["image/png", "image/jpeg", "image/gif"]:
  112. process_file(request, ProcessFileForm(file_id=id), user=user)
  113. file_item = Files.get_file_by_id(id=id)
  114. except Exception as e:
  115. log.exception(e)
  116. log.error(f"Error processing file: {file_item.id}")
  117. file_item = FileModelResponse(
  118. **{
  119. **file_item.model_dump(),
  120. "error": str(e.detail) if hasattr(e, "detail") else str(e),
  121. }
  122. )
  123. if file_item:
  124. return file_item
  125. else:
  126. raise HTTPException(
  127. status_code=status.HTTP_400_BAD_REQUEST,
  128. detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
  129. )
  130. except Exception as e:
  131. log.exception(e)
  132. raise HTTPException(
  133. status_code=status.HTTP_400_BAD_REQUEST,
  134. detail=ERROR_MESSAGES.DEFAULT(e),
  135. )
  136. ############################
  137. # List Files
  138. ############################
  139. @router.get("/", response_model=list[FileModelResponse])
  140. async def list_files(
  141. user=Depends(get_verified_user), include_content: bool = Query(True)
  142. ):
  143. if user.role == "admin":
  144. files = Files.get_files()
  145. else:
  146. files = Files.get_files_by_user_id(user.id)
  147. if not include_content:
  148. for file in files:
  149. file.data["content"] = ""
  150. return files
  151. ############################
  152. # Delete All Files
  153. ############################
  154. @router.delete("/all")
  155. async def delete_all_files(user=Depends(get_admin_user)):
  156. result = Files.delete_all_files()
  157. if result:
  158. try:
  159. Storage.delete_all_files()
  160. except Exception as e:
  161. log.exception(e)
  162. log.error("Error deleting files")
  163. raise HTTPException(
  164. status_code=status.HTTP_400_BAD_REQUEST,
  165. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  166. )
  167. return {"message": "All files deleted successfully"}
  168. else:
  169. raise HTTPException(
  170. status_code=status.HTTP_400_BAD_REQUEST,
  171. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  172. )
  173. ############################
  174. # Get File By Id
  175. ############################
  176. @router.get("/{id}", response_model=Optional[FileModel])
  177. async def get_file_by_id(id: str, user=Depends(get_verified_user)):
  178. file = Files.get_file_by_id(id)
  179. if not file:
  180. raise HTTPException(
  181. status_code=status.HTTP_404_NOT_FOUND,
  182. detail=ERROR_MESSAGES.NOT_FOUND,
  183. )
  184. if (
  185. file.user_id == user.id
  186. or user.role == "admin"
  187. or has_access_to_file(id, "read", user)
  188. ):
  189. return file
  190. else:
  191. raise HTTPException(
  192. status_code=status.HTTP_404_NOT_FOUND,
  193. detail=ERROR_MESSAGES.NOT_FOUND,
  194. )
  195. ############################
  196. # Get File Data Content By Id
  197. ############################
  198. @router.get("/{id}/data/content")
  199. async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
  200. file = Files.get_file_by_id(id)
  201. if not file:
  202. raise HTTPException(
  203. status_code=status.HTTP_404_NOT_FOUND,
  204. detail=ERROR_MESSAGES.NOT_FOUND,
  205. )
  206. if (
  207. file.user_id == user.id
  208. or user.role == "admin"
  209. or has_access_to_file(id, "read", user)
  210. ):
  211. return {"content": file.data.get("content", "")}
  212. else:
  213. raise HTTPException(
  214. status_code=status.HTTP_404_NOT_FOUND,
  215. detail=ERROR_MESSAGES.NOT_FOUND,
  216. )
  217. ############################
  218. # Update File Data Content By Id
  219. ############################
  220. class ContentForm(BaseModel):
  221. content: str
  222. @router.post("/{id}/data/content/update")
  223. async def update_file_data_content_by_id(
  224. request: Request, id: str, form_data: ContentForm, user=Depends(get_verified_user)
  225. ):
  226. file = Files.get_file_by_id(id)
  227. if not file:
  228. raise HTTPException(
  229. status_code=status.HTTP_404_NOT_FOUND,
  230. detail=ERROR_MESSAGES.NOT_FOUND,
  231. )
  232. if (
  233. file.user_id == user.id
  234. or user.role == "admin"
  235. or has_access_to_file(id, "write", user)
  236. ):
  237. try:
  238. process_file(
  239. request,
  240. ProcessFileForm(file_id=id, content=form_data.content),
  241. user=user,
  242. )
  243. file = Files.get_file_by_id(id=id)
  244. except Exception as e:
  245. log.exception(e)
  246. log.error(f"Error processing file: {file.id}")
  247. return {"content": file.data.get("content", "")}
  248. else:
  249. raise HTTPException(
  250. status_code=status.HTTP_404_NOT_FOUND,
  251. detail=ERROR_MESSAGES.NOT_FOUND,
  252. )
  253. ############################
  254. # Get File Content By Id
  255. ############################
  256. @router.get("/{id}/content")
  257. async def get_file_content_by_id(
  258. id: str, user=Depends(get_verified_user), attachment: bool = Query(False)
  259. ):
  260. file = Files.get_file_by_id(id)
  261. if not file:
  262. raise HTTPException(
  263. status_code=status.HTTP_404_NOT_FOUND,
  264. detail=ERROR_MESSAGES.NOT_FOUND,
  265. )
  266. if (
  267. file.user_id == user.id
  268. or user.role == "admin"
  269. or has_access_to_file(id, "read", user)
  270. ):
  271. try:
  272. file_path = Storage.get_file(file.path)
  273. file_path = Path(file_path)
  274. # Check if the file already exists in the cache
  275. if file_path.is_file():
  276. # Handle Unicode filenames
  277. filename = file.meta.get("name", file.filename)
  278. encoded_filename = quote(filename) # RFC5987 encoding
  279. content_type = file.meta.get("content_type")
  280. filename = file.meta.get("name", file.filename)
  281. encoded_filename = quote(filename)
  282. headers = {}
  283. if attachment:
  284. headers["Content-Disposition"] = (
  285. f"attachment; filename*=UTF-8''{encoded_filename}"
  286. )
  287. else:
  288. if content_type == "application/pdf" or filename.lower().endswith(
  289. ".pdf"
  290. ):
  291. headers["Content-Disposition"] = (
  292. f"inline; filename*=UTF-8''{encoded_filename}"
  293. )
  294. content_type = "application/pdf"
  295. elif content_type != "text/plain":
  296. headers["Content-Disposition"] = (
  297. f"attachment; filename*=UTF-8''{encoded_filename}"
  298. )
  299. return FileResponse(file_path, headers=headers, media_type=content_type)
  300. else:
  301. raise HTTPException(
  302. status_code=status.HTTP_404_NOT_FOUND,
  303. detail=ERROR_MESSAGES.NOT_FOUND,
  304. )
  305. except Exception as e:
  306. log.exception(e)
  307. log.error("Error getting file content")
  308. raise HTTPException(
  309. status_code=status.HTTP_400_BAD_REQUEST,
  310. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  311. )
  312. else:
  313. raise HTTPException(
  314. status_code=status.HTTP_404_NOT_FOUND,
  315. detail=ERROR_MESSAGES.NOT_FOUND,
  316. )
  317. @router.get("/{id}/content/html")
  318. async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
  319. file = Files.get_file_by_id(id)
  320. if not file:
  321. raise HTTPException(
  322. status_code=status.HTTP_404_NOT_FOUND,
  323. detail=ERROR_MESSAGES.NOT_FOUND,
  324. )
  325. if (
  326. file.user_id == user.id
  327. or user.role == "admin"
  328. or has_access_to_file(id, "read", user)
  329. ):
  330. try:
  331. file_path = Storage.get_file(file.path)
  332. file_path = Path(file_path)
  333. # Check if the file already exists in the cache
  334. if file_path.is_file():
  335. log.info(f"file_path: {file_path}")
  336. return FileResponse(file_path)
  337. else:
  338. raise HTTPException(
  339. status_code=status.HTTP_404_NOT_FOUND,
  340. detail=ERROR_MESSAGES.NOT_FOUND,
  341. )
  342. except Exception as e:
  343. log.exception(e)
  344. log.error("Error getting file content")
  345. raise HTTPException(
  346. status_code=status.HTTP_400_BAD_REQUEST,
  347. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  348. )
  349. else:
  350. raise HTTPException(
  351. status_code=status.HTTP_404_NOT_FOUND,
  352. detail=ERROR_MESSAGES.NOT_FOUND,
  353. )
  354. @router.get("/{id}/content/{file_name}")
  355. async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
  356. file = Files.get_file_by_id(id)
  357. if not file:
  358. raise HTTPException(
  359. status_code=status.HTTP_404_NOT_FOUND,
  360. detail=ERROR_MESSAGES.NOT_FOUND,
  361. )
  362. if (
  363. file.user_id == user.id
  364. or user.role == "admin"
  365. or has_access_to_file(id, "read", user)
  366. ):
  367. file_path = file.path
  368. # Handle Unicode filenames
  369. filename = file.meta.get("name", file.filename)
  370. encoded_filename = quote(filename) # RFC5987 encoding
  371. headers = {
  372. "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
  373. }
  374. if file_path:
  375. file_path = Storage.get_file(file_path)
  376. file_path = Path(file_path)
  377. # Check if the file already exists in the cache
  378. if file_path.is_file():
  379. return FileResponse(file_path, headers=headers)
  380. else:
  381. raise HTTPException(
  382. status_code=status.HTTP_404_NOT_FOUND,
  383. detail=ERROR_MESSAGES.NOT_FOUND,
  384. )
  385. else:
  386. # File path doesn’t exist, return the content as .txt if possible
  387. file_content = file.content.get("content", "")
  388. file_name = file.filename
  389. # Create a generator that encodes the file content
  390. def generator():
  391. yield file_content.encode("utf-8")
  392. return StreamingResponse(
  393. generator(),
  394. media_type="text/plain",
  395. headers=headers,
  396. )
  397. else:
  398. raise HTTPException(
  399. status_code=status.HTTP_404_NOT_FOUND,
  400. detail=ERROR_MESSAGES.NOT_FOUND,
  401. )
  402. ############################
  403. # Delete File By Id
  404. ############################
  405. @router.delete("/{id}")
  406. async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
  407. file = Files.get_file_by_id(id)
  408. if not file:
  409. raise HTTPException(
  410. status_code=status.HTTP_404_NOT_FOUND,
  411. detail=ERROR_MESSAGES.NOT_FOUND,
  412. )
  413. if (
  414. file.user_id == user.id
  415. or user.role == "admin"
  416. or has_access_to_file(id, "write", user)
  417. ):
  418. # We should add Chroma cleanup here
  419. result = Files.delete_file_by_id(id)
  420. if result:
  421. try:
  422. Storage.delete_file(file.path)
  423. except Exception as e:
  424. log.exception(e)
  425. log.error("Error deleting files")
  426. raise HTTPException(
  427. status_code=status.HTTP_400_BAD_REQUEST,
  428. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  429. )
  430. return {"message": "File deleted successfully"}
  431. else:
  432. raise HTTPException(
  433. status_code=status.HTTP_400_BAD_REQUEST,
  434. detail=ERROR_MESSAGES.DEFAULT("Error deleting file"),
  435. )
  436. else:
  437. raise HTTPException(
  438. status_code=status.HTTP_404_NOT_FOUND,
  439. detail=ERROR_MESSAGES.NOT_FOUND,
  440. )