external_document.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import requests
  2. import logging, os
  3. from typing import Iterator, List, Union
  4. from langchain_core.document_loaders import BaseLoader
  5. from langchain_core.documents import Document
  6. from open_webui.env import SRC_LOG_LEVELS
  7. log = logging.getLogger(__name__)
  8. log.setLevel(SRC_LOG_LEVELS["RAG"])
  9. class ExternalDocumentLoader(BaseLoader):
  10. def __init__(
  11. self,
  12. file_path,
  13. url: str,
  14. api_key: str,
  15. mime_type=None,
  16. **kwargs,
  17. ) -> None:
  18. self.url = url
  19. self.api_key = api_key
  20. self.file_path = file_path
  21. self.mime_type = mime_type
  22. def load(self) -> List[Document]:
  23. with open(self.file_path, "rb") as f:
  24. data = f.read()
  25. headers = {}
  26. if self.mime_type is not None:
  27. headers["Content-Type"] = self.mime_type
  28. if self.api_key is not None:
  29. headers["Authorization"] = f"Bearer {self.api_key}"
  30. try:
  31. headers["X-Filename"] = os.path.basename(self.file_path)
  32. except:
  33. pass
  34. url = self.url
  35. if url.endswith("/"):
  36. url = url[:-1]
  37. try:
  38. response = requests.put(f"{url}/process", data=data, headers=headers)
  39. except Exception as e:
  40. log.error(f"Error connecting to endpoint: {e}")
  41. raise Exception(f"Error connecting to endpoint: {e}")
  42. if response.ok:
  43. response_data = response.json()
  44. if response_data:
  45. if isinstance(response_data, dict):
  46. return [
  47. Document(
  48. page_content=response_data.get("page_content"),
  49. metadata=response_data.get("metadata"),
  50. )
  51. ]
  52. elif isinstance(response_data, list):
  53. documents = []
  54. for document in response_data:
  55. documents.append(
  56. Document(
  57. page_content=document.get("page_content"),
  58. metadata=document.get("metadata"),
  59. )
  60. )
  61. return documents
  62. else:
  63. raise Exception("Error loading document: Unable to parse content")
  64. else:
  65. raise Exception("Error loading document: No content returned")
  66. else:
  67. raise Exception(
  68. f"Error loading document: {response.status_code} {response.text}"
  69. )