12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- import requests
- import logging
- from typing import Iterator, List, Union
- from langchain_core.document_loaders import BaseLoader
- from langchain_core.documents import Document
- from open_webui.env import SRC_LOG_LEVELS
- log = logging.getLogger(__name__)
- log.setLevel(SRC_LOG_LEVELS["RAG"])
- class ExternalDocumentLoader(BaseLoader):
- def __init__(
- self,
- file_path,
- url: str,
- api_key: str,
- mime_type=None,
- **kwargs,
- ) -> None:
- self.url = url
- self.api_key = api_key
- self.file_path = file_path
- self.mime_type = mime_type
- def load(self) -> list[Document]:
- with open(self.file_path, "rb") as f:
- data = f.read()
- headers = {}
- if self.mime_type is not None:
- headers["Content-Type"] = self.mime_type
- if self.api_key is not None:
- headers["Authorization"] = f"Bearer {self.api_key}"
- url = self.url
- if url.endswith("/"):
- url = url[:-1]
- r = requests.put(f"{url}/process", data=data, headers=headers)
- if r.ok:
- res = r.json()
- if res:
- return [
- Document(
- page_content=res.get("page_content"),
- metadata=res.get("metadata"),
- )
- ]
- else:
- raise Exception("Error loading document: No content returned")
- else:
- raise Exception(f"Error loading document: {r.status_code} {r.text}")
|