external_document.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. import requests
  2. import logging, os
  3. from typing import Iterator, List, Union
  4. from langchain_core.document_loaders import BaseLoader
  5. from langchain_core.documents import Document
  6. from open_webui.env import SRC_LOG_LEVELS
  7. log = logging.getLogger(__name__)
  8. log.setLevel(SRC_LOG_LEVELS["RAG"])
  9. class ExternalDocumentLoader(BaseLoader):
  10. def __init__(
  11. self,
  12. file_path,
  13. url: str,
  14. api_key: str,
  15. mime_type=None,
  16. **kwargs,
  17. ) -> None:
  18. self.url = url
  19. self.api_key = api_key
  20. self.file_path = file_path
  21. self.mime_type = mime_type
  22. def load(self) -> List[Document]:
  23. with open(self.file_path, "rb") as f:
  24. data = f.read()
  25. headers = {}
  26. if self.mime_type is not None:
  27. headers["Content-Type"] = self.mime_type
  28. if self.api_key is not None:
  29. headers["Authorization"] = f"Bearer {self.api_key}"
  30. try:
  31. headers["X-Filename"] = os.path.basename(self.file_path)
  32. except:
  33. pass
  34. url = self.url
  35. if url.endswith("/"):
  36. url = url[:-1]
  37. try:
  38. response = requests.put(f"{url}/process", data=data, headers=headers)
  39. except Exception as e:
  40. log.error(f"Error connecting to endpoint: {e}")
  41. raise Exception(f"Error connecting to endpoint: {e}")
  42. if response.ok:
  43. response_data = response.json()
  44. if response_data:
  45. if isinstance(response_data, dict):
  46. return [
  47. Document(
  48. page_content=response_data.get("page_content"),
  49. metadata=response_data.get("metadata"),
  50. )
  51. ]
  52. elif isinstance(response_data, list):
  53. documents = []
  54. for document in response_data:
  55. documents.append(Document(
  56. page_content=document.get("page_content"),
  57. metadata=document.get("metadata"),
  58. ))
  59. return documents
  60. else:
  61. raise Exception("Error loading document: Unable to parse content")
  62. else:
  63. raise Exception("Error loading document: No content returned")
  64. else:
  65. raise Exception(f"Error loading document: {response.status_code} {response.text}")