external_document.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import requests
  2. import logging
  3. from typing import Iterator, List, Union
  4. from langchain_core.document_loaders import BaseLoader
  5. from langchain_core.documents import Document
  6. from open_webui.env import SRC_LOG_LEVELS
  7. log = logging.getLogger(__name__)
  8. log.setLevel(SRC_LOG_LEVELS["RAG"])
  9. class ExternalDocumentLoader(BaseLoader):
  10. def __init__(
  11. self,
  12. file_path,
  13. url: str,
  14. api_key: str,
  15. mime_type=None,
  16. **kwargs,
  17. ) -> None:
  18. self.url = url
  19. self.api_key = api_key
  20. self.file_path = file_path
  21. self.mime_type = mime_type
  22. def load(self) -> list[Document]:
  23. with open(self.file_path, "rb") as f:
  24. data = f.read()
  25. headers = {}
  26. if self.mime_type is not None:
  27. headers["Content-Type"] = self.mime_type
  28. if self.api_key is not None:
  29. headers["Authorization"] = f"Bearer {self.api_key}"
  30. url = self.url
  31. if url.endswith("/"):
  32. url = url[:-1]
  33. r = requests.put(f"{url}/process", data=data, headers=headers)
  34. if r.ok:
  35. res = r.json()
  36. if res:
  37. return [
  38. Document(
  39. page_content=res.get("page_content"),
  40. metadata=res.get("metadata"),
  41. )
  42. ]
  43. else:
  44. raise Exception("Error loading document: No content returned")
  45. else:
  46. raise Exception(f"Error loading document: {r.status_code} {r.text}")