files.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. import logging
  2. import os
  3. import uuid
  4. from pathlib import Path
  5. from typing import Optional
  6. from urllib.parse import quote
  7. from fastapi import (
  8. APIRouter,
  9. Depends,
  10. File,
  11. HTTPException,
  12. Request,
  13. UploadFile,
  14. status,
  15. Query,
  16. )
  17. from fastapi.responses import FileResponse, StreamingResponse
  18. from open_webui.constants import ERROR_MESSAGES
  19. from open_webui.env import SRC_LOG_LEVELS
  20. from open_webui.models.files import (
  21. FileForm,
  22. FileModel,
  23. FileModelResponse,
  24. Files,
  25. )
  26. from open_webui.routers.retrieval import ProcessFileForm, process_file
  27. from open_webui.routers.audio import transcribe
  28. from open_webui.storage.provider import Storage
  29. from open_webui.utils.auth import get_admin_user, get_verified_user
  30. from pydantic import BaseModel
  31. log = logging.getLogger(__name__)
  32. log.setLevel(SRC_LOG_LEVELS["MODELS"])
  33. router = APIRouter()
  34. ############################
  35. # Upload File
  36. ############################
  37. @router.post("/", response_model=FileModelResponse)
  38. def upload_file(
  39. request: Request,
  40. file: UploadFile = File(...),
  41. user=Depends(get_verified_user),
  42. file_metadata: dict = {},
  43. process: bool = Query(True),
  44. ):
  45. log.info(f"file.content_type: {file.content_type}")
  46. try:
  47. unsanitized_filename = file.filename
  48. filename = os.path.basename(unsanitized_filename)
  49. # replace filename with uuid
  50. id = str(uuid.uuid4())
  51. name = filename
  52. filename = f"{id}_{filename}"
  53. contents, file_path = Storage.upload_file(file.file, filename)
  54. file_item = Files.insert_new_file(
  55. user.id,
  56. FileForm(
  57. **{
  58. "id": id,
  59. "filename": name,
  60. "path": file_path,
  61. "meta": {
  62. "name": name,
  63. "content_type": file.content_type,
  64. "size": len(contents),
  65. "data": file_metadata,
  66. },
  67. }
  68. ),
  69. )
  70. if process:
  71. try:
  72. if file.content_type in [
  73. "audio/mpeg",
  74. "audio/wav",
  75. "audio/ogg",
  76. "audio/x-m4a",
  77. ]:
  78. file_path = Storage.get_file(file_path)
  79. result = transcribe(request, file_path)
  80. process_file(
  81. request,
  82. ProcessFileForm(file_id=id, content=result.get("text", "")),
  83. user=user,
  84. )
  85. elif file.content_type not in ["image/png", "image/jpeg", "image/gif"]:
  86. process_file(request, ProcessFileForm(file_id=id), user=user)
  87. file_item = Files.get_file_by_id(id=id)
  88. except Exception as e:
  89. log.exception(e)
  90. log.error(f"Error processing file: {file_item.id}")
  91. file_item = FileModelResponse(
  92. **{
  93. **file_item.model_dump(),
  94. "error": str(e.detail) if hasattr(e, "detail") else str(e),
  95. }
  96. )
  97. if file_item:
  98. return file_item
  99. else:
  100. raise HTTPException(
  101. status_code=status.HTTP_400_BAD_REQUEST,
  102. detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
  103. )
  104. except Exception as e:
  105. log.exception(e)
  106. raise HTTPException(
  107. status_code=status.HTTP_400_BAD_REQUEST,
  108. detail=ERROR_MESSAGES.DEFAULT(e),
  109. )
  110. ############################
  111. # List Files
  112. ############################
  113. @router.get("/", response_model=list[FileModelResponse])
  114. async def list_files(user=Depends(get_verified_user)):
  115. if user.role == "admin":
  116. files = Files.get_files()
  117. else:
  118. files = Files.get_files_by_user_id(user.id)
  119. return files
  120. ############################
  121. # Delete All Files
  122. ############################
  123. @router.delete("/all")
  124. async def delete_all_files(user=Depends(get_admin_user)):
  125. result = Files.delete_all_files()
  126. if result:
  127. try:
  128. Storage.delete_all_files()
  129. except Exception as e:
  130. log.exception(e)
  131. log.error("Error deleting files")
  132. raise HTTPException(
  133. status_code=status.HTTP_400_BAD_REQUEST,
  134. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  135. )
  136. return {"message": "All files deleted successfully"}
  137. else:
  138. raise HTTPException(
  139. status_code=status.HTTP_400_BAD_REQUEST,
  140. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  141. )
  142. ############################
  143. # Get File By Id
  144. ############################
  145. @router.get("/{id}", response_model=Optional[FileModel])
  146. async def get_file_by_id(id: str, user=Depends(get_verified_user)):
  147. file = Files.get_file_by_id(id)
  148. if file and (file.user_id == user.id or user.role == "admin"):
  149. return file
  150. else:
  151. raise HTTPException(
  152. status_code=status.HTTP_404_NOT_FOUND,
  153. detail=ERROR_MESSAGES.NOT_FOUND,
  154. )
  155. ############################
  156. # Get File Data Content By Id
  157. ############################
  158. @router.get("/{id}/data/content")
  159. async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
  160. file = Files.get_file_by_id(id)
  161. if file and (file.user_id == user.id or user.role == "admin"):
  162. return {"content": file.data.get("content", "")}
  163. else:
  164. raise HTTPException(
  165. status_code=status.HTTP_404_NOT_FOUND,
  166. detail=ERROR_MESSAGES.NOT_FOUND,
  167. )
  168. ############################
  169. # Update File Data Content By Id
  170. ############################
  171. class ContentForm(BaseModel):
  172. content: str
  173. @router.post("/{id}/data/content/update")
  174. async def update_file_data_content_by_id(
  175. request: Request, id: str, form_data: ContentForm, user=Depends(get_verified_user)
  176. ):
  177. file = Files.get_file_by_id(id)
  178. if file and (file.user_id == user.id or user.role == "admin"):
  179. try:
  180. process_file(
  181. request,
  182. ProcessFileForm(file_id=id, content=form_data.content),
  183. user=user,
  184. )
  185. file = Files.get_file_by_id(id=id)
  186. except Exception as e:
  187. log.exception(e)
  188. log.error(f"Error processing file: {file.id}")
  189. return {"content": file.data.get("content", "")}
  190. else:
  191. raise HTTPException(
  192. status_code=status.HTTP_404_NOT_FOUND,
  193. detail=ERROR_MESSAGES.NOT_FOUND,
  194. )
  195. ############################
  196. # Get File Content By Id
  197. ############################
  198. @router.get("/{id}/content")
  199. async def get_file_content_by_id(
  200. id: str, user=Depends(get_verified_user), attachment: bool = Query(False)
  201. ):
  202. file = Files.get_file_by_id(id)
  203. if file and (file.user_id == user.id or user.role == "admin"):
  204. try:
  205. file_path = Storage.get_file(file.path)
  206. file_path = Path(file_path)
  207. # Check if the file already exists in the cache
  208. if file_path.is_file():
  209. # Handle Unicode filenames
  210. filename = file.meta.get("name", file.filename)
  211. encoded_filename = quote(filename) # RFC5987 encoding
  212. content_type = file.meta.get("content_type")
  213. filename = file.meta.get("name", file.filename)
  214. encoded_filename = quote(filename)
  215. headers = {}
  216. if attachment:
  217. headers["Content-Disposition"] = (
  218. f"attachment; filename*=UTF-8''{encoded_filename}"
  219. )
  220. else:
  221. if content_type == "application/pdf" or filename.lower().endswith(
  222. ".pdf"
  223. ):
  224. headers["Content-Disposition"] = (
  225. f"inline; filename*=UTF-8''{encoded_filename}"
  226. )
  227. content_type = "application/pdf"
  228. elif content_type != "text/plain":
  229. headers["Content-Disposition"] = (
  230. f"attachment; filename*=UTF-8''{encoded_filename}"
  231. )
  232. return FileResponse(file_path, headers=headers, media_type=content_type)
  233. else:
  234. raise HTTPException(
  235. status_code=status.HTTP_404_NOT_FOUND,
  236. detail=ERROR_MESSAGES.NOT_FOUND,
  237. )
  238. except Exception as e:
  239. log.exception(e)
  240. log.error("Error getting file content")
  241. raise HTTPException(
  242. status_code=status.HTTP_400_BAD_REQUEST,
  243. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  244. )
  245. else:
  246. raise HTTPException(
  247. status_code=status.HTTP_404_NOT_FOUND,
  248. detail=ERROR_MESSAGES.NOT_FOUND,
  249. )
  250. @router.get("/{id}/content/html")
  251. async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
  252. file = Files.get_file_by_id(id)
  253. if file and (file.user_id == user.id or user.role == "admin"):
  254. try:
  255. file_path = Storage.get_file(file.path)
  256. file_path = Path(file_path)
  257. # Check if the file already exists in the cache
  258. if file_path.is_file():
  259. log.info(f"file_path: {file_path}")
  260. return FileResponse(file_path)
  261. else:
  262. raise HTTPException(
  263. status_code=status.HTTP_404_NOT_FOUND,
  264. detail=ERROR_MESSAGES.NOT_FOUND,
  265. )
  266. except Exception as e:
  267. log.exception(e)
  268. log.error("Error getting file content")
  269. raise HTTPException(
  270. status_code=status.HTTP_400_BAD_REQUEST,
  271. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  272. )
  273. else:
  274. raise HTTPException(
  275. status_code=status.HTTP_404_NOT_FOUND,
  276. detail=ERROR_MESSAGES.NOT_FOUND,
  277. )
  278. @router.get("/{id}/content/{file_name}")
  279. async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
  280. file = Files.get_file_by_id(id)
  281. if file and (file.user_id == user.id or user.role == "admin"):
  282. file_path = file.path
  283. # Handle Unicode filenames
  284. filename = file.meta.get("name", file.filename)
  285. encoded_filename = quote(filename) # RFC5987 encoding
  286. headers = {
  287. "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
  288. }
  289. if file_path:
  290. file_path = Storage.get_file(file_path)
  291. file_path = Path(file_path)
  292. # Check if the file already exists in the cache
  293. if file_path.is_file():
  294. return FileResponse(file_path, headers=headers)
  295. else:
  296. raise HTTPException(
  297. status_code=status.HTTP_404_NOT_FOUND,
  298. detail=ERROR_MESSAGES.NOT_FOUND,
  299. )
  300. else:
  301. # File path doesn’t exist, return the content as .txt if possible
  302. file_content = file.content.get("content", "")
  303. file_name = file.filename
  304. # Create a generator that encodes the file content
  305. def generator():
  306. yield file_content.encode("utf-8")
  307. return StreamingResponse(
  308. generator(),
  309. media_type="text/plain",
  310. headers=headers,
  311. )
  312. else:
  313. raise HTTPException(
  314. status_code=status.HTTP_404_NOT_FOUND,
  315. detail=ERROR_MESSAGES.NOT_FOUND,
  316. )
  317. ############################
  318. # Delete File By Id
  319. ############################
  320. @router.delete("/{id}")
  321. async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
  322. file = Files.get_file_by_id(id)
  323. if file and (file.user_id == user.id or user.role == "admin"):
  324. # We should add Chroma cleanup here
  325. result = Files.delete_file_by_id(id)
  326. if result:
  327. try:
  328. Storage.delete_file(file.path)
  329. except Exception as e:
  330. log.exception(e)
  331. log.error("Error deleting files")
  332. raise HTTPException(
  333. status_code=status.HTTP_400_BAD_REQUEST,
  334. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  335. )
  336. return {"message": "File deleted successfully"}
  337. else:
  338. raise HTTPException(
  339. status_code=status.HTTP_400_BAD_REQUEST,
  340. detail=ERROR_MESSAGES.DEFAULT("Error deleting file"),
  341. )
  342. else:
  343. raise HTTPException(
  344. status_code=status.HTTP_404_NOT_FOUND,
  345. detail=ERROR_MESSAGES.NOT_FOUND,
  346. )