files.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. import logging
  2. import os
  3. import uuid
  4. from pathlib import Path
  5. from typing import Optional
  6. from urllib.parse import quote
  7. from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status, Query
  8. from fastapi.responses import FileResponse, StreamingResponse
  9. from open_webui.constants import ERROR_MESSAGES
  10. from open_webui.env import SRC_LOG_LEVELS
  11. from open_webui.models.files import (
  12. FileForm,
  13. FileModel,
  14. FileModelResponse,
  15. Files,
  16. )
  17. from open_webui.routers.retrieval import ProcessFileForm, process_file
  18. from open_webui.routers.audio import transcribe
  19. from open_webui.storage.provider import Storage
  20. from open_webui.utils.auth import get_admin_user, get_verified_user
  21. from pydantic import BaseModel
  22. log = logging.getLogger(__name__)
  23. log.setLevel(SRC_LOG_LEVELS["MODELS"])
  24. router = APIRouter()
  25. ############################
  26. # Upload File
  27. ############################
  28. @router.post("/", response_model=FileModelResponse)
  29. def upload_file(
  30. request: Request,
  31. file: UploadFile = File(...),
  32. user=Depends(get_verified_user),
  33. file_metadata: dict = {},
  34. ingest_file: bool = Query(True)
  35. ):
  36. log.info(f"file.content_type: {file.content_type}")
  37. try:
  38. unsanitized_filename = file.filename
  39. filename = os.path.basename(unsanitized_filename)
  40. # replace filename with uuid
  41. id = str(uuid.uuid4())
  42. name = filename
  43. filename = f"{id}_{filename}"
  44. contents, file_path = Storage.upload_file(file.file, filename)
  45. file_item = Files.insert_new_file(
  46. user.id,
  47. FileForm(
  48. **{
  49. "id": id,
  50. "filename": name,
  51. "path": file_path,
  52. "meta": {
  53. "name": name,
  54. "content_type": file.content_type,
  55. "size": len(contents),
  56. "data": file_metadata,
  57. },
  58. }
  59. ),
  60. )
  61. if ingest_file:
  62. try:
  63. if file.content_type in [
  64. "audio/mpeg",
  65. "audio/wav",
  66. "audio/ogg",
  67. "audio/x-m4a",
  68. ]:
  69. file_path = Storage.get_file(file_path)
  70. result = transcribe(request, file_path)
  71. process_file(
  72. request,
  73. ProcessFileForm(file_id=id, content=result.get("text", "")),
  74. user=user,
  75. )
  76. elif file.content_type not in ["image/png", "image/jpeg", "image/gif"]:
  77. process_file(request, ProcessFileForm(file_id=id), user=user)
  78. file_item = Files.get_file_by_id(id=id)
  79. except Exception as e:
  80. log.exception(e)
  81. log.error(f"Error processing file: {file_item.id}")
  82. file_item = FileModelResponse(
  83. **{
  84. **file_item.model_dump(),
  85. "error": str(e.detail) if hasattr(e, "detail") else str(e),
  86. }
  87. )
  88. if file_item:
  89. return file_item
  90. else:
  91. raise HTTPException(
  92. status_code=status.HTTP_400_BAD_REQUEST,
  93. detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
  94. )
  95. except Exception as e:
  96. log.exception(e)
  97. raise HTTPException(
  98. status_code=status.HTTP_400_BAD_REQUEST,
  99. detail=ERROR_MESSAGES.DEFAULT(e),
  100. )
  101. ############################
  102. # List Files
  103. ############################
  104. @router.get("/", response_model=list[FileModelResponse])
  105. async def list_files(user=Depends(get_verified_user)):
  106. if user.role == "admin":
  107. files = Files.get_files()
  108. else:
  109. files = Files.get_files_by_user_id(user.id)
  110. return files
  111. ############################
  112. # Delete All Files
  113. ############################
  114. @router.delete("/all")
  115. async def delete_all_files(user=Depends(get_admin_user)):
  116. result = Files.delete_all_files()
  117. if result:
  118. try:
  119. Storage.delete_all_files()
  120. except Exception as e:
  121. log.exception(e)
  122. log.error("Error deleting files")
  123. raise HTTPException(
  124. status_code=status.HTTP_400_BAD_REQUEST,
  125. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  126. )
  127. return {"message": "All files deleted successfully"}
  128. else:
  129. raise HTTPException(
  130. status_code=status.HTTP_400_BAD_REQUEST,
  131. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  132. )
  133. ############################
  134. # Get File By Id
  135. ############################
  136. @router.get("/{id}", response_model=Optional[FileModel])
  137. async def get_file_by_id(id: str, user=Depends(get_verified_user)):
  138. file = Files.get_file_by_id(id)
  139. if file and (file.user_id == user.id or user.role == "admin"):
  140. return file
  141. else:
  142. raise HTTPException(
  143. status_code=status.HTTP_404_NOT_FOUND,
  144. detail=ERROR_MESSAGES.NOT_FOUND,
  145. )
  146. ############################
  147. # Get File Data Content By Id
  148. ############################
  149. @router.get("/{id}/data/content")
  150. async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
  151. file = Files.get_file_by_id(id)
  152. if file and (file.user_id == user.id or user.role == "admin"):
  153. return {"content": file.data.get("content", "")}
  154. else:
  155. raise HTTPException(
  156. status_code=status.HTTP_404_NOT_FOUND,
  157. detail=ERROR_MESSAGES.NOT_FOUND,
  158. )
  159. ############################
  160. # Update File Data Content By Id
  161. ############################
  162. class ContentForm(BaseModel):
  163. content: str
  164. @router.post("/{id}/data/content/update")
  165. async def update_file_data_content_by_id(
  166. request: Request, id: str, form_data: ContentForm, user=Depends(get_verified_user)
  167. ):
  168. file = Files.get_file_by_id(id)
  169. if file and (file.user_id == user.id or user.role == "admin"):
  170. try:
  171. process_file(
  172. request,
  173. ProcessFileForm(file_id=id, content=form_data.content),
  174. user=user,
  175. )
  176. file = Files.get_file_by_id(id=id)
  177. except Exception as e:
  178. log.exception(e)
  179. log.error(f"Error processing file: {file.id}")
  180. return {"content": file.data.get("content", "")}
  181. else:
  182. raise HTTPException(
  183. status_code=status.HTTP_404_NOT_FOUND,
  184. detail=ERROR_MESSAGES.NOT_FOUND,
  185. )
  186. ############################
  187. # Get File Content By Id
  188. ############################
  189. @router.get("/{id}/content")
  190. async def get_file_content_by_id(id: str, user=Depends(get_verified_user), as_attachment: bool = Query(False)):
  191. file = Files.get_file_by_id(id)
  192. if file and (file.user_id == user.id or user.role == "admin"):
  193. try:
  194. file_path = Storage.get_file(file.path)
  195. file_path = Path(file_path)
  196. # Check if the file already exists in the cache
  197. if file_path.is_file():
  198. # Handle Unicode filenames
  199. filename = file.meta.get("name", file.filename)
  200. encoded_filename = quote(filename) # RFC5987 encoding
  201. content_type = file.meta.get("content_type")
  202. filename = file.meta.get("name", file.filename)
  203. encoded_filename = quote(filename)
  204. headers = {}
  205. if as_attachment:
  206. headers["Content-Disposition"] = (
  207. f"attachment; filename*=UTF-8''{encoded_filename}"
  208. )
  209. else:
  210. if content_type == "application/pdf" or filename.lower().endswith(".pdf"):
  211. headers["Content-Disposition"] = (
  212. f"inline; filename*=UTF-8''{encoded_filename}"
  213. )
  214. content_type = "application/pdf"
  215. elif content_type != "text/plain":
  216. headers["Content-Disposition"] = (
  217. f"attachment; filename*=UTF-8''{encoded_filename}"
  218. )
  219. return FileResponse(file_path, headers=headers, media_type=content_type)
  220. else:
  221. raise HTTPException(
  222. status_code=status.HTTP_404_NOT_FOUND,
  223. detail=ERROR_MESSAGES.NOT_FOUND,
  224. )
  225. except Exception as e:
  226. log.exception(e)
  227. log.error("Error getting file content")
  228. raise HTTPException(
  229. status_code=status.HTTP_400_BAD_REQUEST,
  230. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  231. )
  232. else:
  233. raise HTTPException(
  234. status_code=status.HTTP_404_NOT_FOUND,
  235. detail=ERROR_MESSAGES.NOT_FOUND,
  236. )
  237. @router.get("/{id}/content/html")
  238. async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
  239. file = Files.get_file_by_id(id)
  240. if file and (file.user_id == user.id or user.role == "admin"):
  241. try:
  242. file_path = Storage.get_file(file.path)
  243. file_path = Path(file_path)
  244. # Check if the file already exists in the cache
  245. if file_path.is_file():
  246. log.info(f"file_path: {file_path}")
  247. return FileResponse(file_path)
  248. else:
  249. raise HTTPException(
  250. status_code=status.HTTP_404_NOT_FOUND,
  251. detail=ERROR_MESSAGES.NOT_FOUND,
  252. )
  253. except Exception as e:
  254. log.exception(e)
  255. log.error("Error getting file content")
  256. raise HTTPException(
  257. status_code=status.HTTP_400_BAD_REQUEST,
  258. detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
  259. )
  260. else:
  261. raise HTTPException(
  262. status_code=status.HTTP_404_NOT_FOUND,
  263. detail=ERROR_MESSAGES.NOT_FOUND,
  264. )
  265. @router.get("/{id}/content/{file_name}")
  266. async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
  267. file = Files.get_file_by_id(id)
  268. if file and (file.user_id == user.id or user.role == "admin"):
  269. file_path = file.path
  270. # Handle Unicode filenames
  271. filename = file.meta.get("name", file.filename)
  272. encoded_filename = quote(filename) # RFC5987 encoding
  273. headers = {
  274. "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
  275. }
  276. if file_path:
  277. file_path = Storage.get_file(file_path)
  278. file_path = Path(file_path)
  279. # Check if the file already exists in the cache
  280. if file_path.is_file():
  281. return FileResponse(file_path, headers=headers)
  282. else:
  283. raise HTTPException(
  284. status_code=status.HTTP_404_NOT_FOUND,
  285. detail=ERROR_MESSAGES.NOT_FOUND,
  286. )
  287. else:
  288. # File path doesn’t exist, return the content as .txt if possible
  289. file_content = file.content.get("content", "")
  290. file_name = file.filename
  291. # Create a generator that encodes the file content
  292. def generator():
  293. yield file_content.encode("utf-8")
  294. return StreamingResponse(
  295. generator(),
  296. media_type="text/plain",
  297. headers=headers,
  298. )
  299. else:
  300. raise HTTPException(
  301. status_code=status.HTTP_404_NOT_FOUND,
  302. detail=ERROR_MESSAGES.NOT_FOUND,
  303. )
  304. ############################
  305. # Delete File By Id
  306. ############################
  307. @router.delete("/{id}")
  308. async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
  309. file = Files.get_file_by_id(id)
  310. if file and (file.user_id == user.id or user.role == "admin"):
  311. # We should add Chroma cleanup here
  312. result = Files.delete_file_by_id(id)
  313. if result:
  314. try:
  315. Storage.delete_file(file.path)
  316. except Exception as e:
  317. log.exception(e)
  318. log.error("Error deleting files")
  319. raise HTTPException(
  320. status_code=status.HTTP_400_BAD_REQUEST,
  321. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  322. )
  323. return {"message": "File deleted successfully"}
  324. else:
  325. raise HTTPException(
  326. status_code=status.HTTP_400_BAD_REQUEST,
  327. detail=ERROR_MESSAGES.DEFAULT("Error deleting file"),
  328. )
  329. else:
  330. raise HTTPException(
  331. status_code=status.HTTP_404_NOT_FOUND,
  332. detail=ERROR_MESSAGES.NOT_FOUND,
  333. )