1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- import requests
- import logging, os
- from typing import Iterator, List, Union
- from langchain_core.document_loaders import BaseLoader
- from langchain_core.documents import Document
- from open_webui.env import SRC_LOG_LEVELS
- log = logging.getLogger(__name__)
- log.setLevel(SRC_LOG_LEVELS["RAG"])
- class ExternalDocumentLoader(BaseLoader):
- def __init__(
- self,
- file_path,
- url: str,
- api_key: str,
- mime_type=None,
- **kwargs,
- ) -> None:
- self.url = url
- self.api_key = api_key
- self.file_path = file_path
- self.mime_type = mime_type
- def load(self) -> List[Document]:
- with open(self.file_path, "rb") as f:
- data = f.read()
- headers = {}
- if self.mime_type is not None:
- headers["Content-Type"] = self.mime_type
- if self.api_key is not None:
- headers["Authorization"] = f"Bearer {self.api_key}"
- try:
- headers["X-Filename"] = os.path.basename(self.file_path)
- except:
- pass
- url = self.url
- if url.endswith("/"):
- url = url[:-1]
- try:
- response = requests.put(f"{url}/process", data=data, headers=headers)
- except Exception as e:
- log.error(f"Error connecting to endpoint: {e}")
- raise Exception(f"Error connecting to endpoint: {e}")
- if response.ok:
- response_data = response.json()
- if response_data:
- if isinstance(response_data, dict):
- return [
- Document(
- page_content=response_data.get("page_content"),
- metadata=response_data.get("metadata"),
- )
- ]
- elif isinstance(response_data, list):
- documents = []
- for document in response_data:
- documents.append(
- Document(
- page_content=document.get("page_content"),
- metadata=document.get("metadata"),
- )
- )
- return documents
- else:
- raise Exception("Error loading document: Unable to parse content")
- else:
- raise Exception("Error loading document: No content returned")
- else:
- raise Exception(
- f"Error loading document: {response.status_code} {response.text}"
- )
|