|
|
@@ -30,10 +30,9 @@ class MistralLoader:
|
|
|
- Enhanced error handling with retryable error classification
|
|
|
"""
|
|
|
|
|
|
- BASE_API_URL = "https://api.mistral.ai/v1"
|
|
|
-
|
|
|
def __init__(
|
|
|
self,
|
|
|
+ base_url: str,
|
|
|
api_key: str,
|
|
|
file_path: str,
|
|
|
timeout: int = 300, # 5 minutes default
|
|
|
@@ -55,6 +54,9 @@ class MistralLoader:
|
|
|
if not os.path.exists(file_path):
|
|
|
raise FileNotFoundError(f"File not found at {file_path}")
|
|
|
|
|
|
+ self.base_url = (
|
|
|
+ base_url.rstrip("/") if base_url else "https://api.mistral.ai/v1"
|
|
|
+ )
|
|
|
self.api_key = api_key
|
|
|
self.file_path = file_path
|
|
|
self.timeout = timeout
|
|
|
@@ -240,7 +242,7 @@ class MistralLoader:
|
|
|
in a context manager to minimize memory usage duration.
|
|
|
"""
|
|
|
log.info("Uploading file to Mistral API")
|
|
|
- url = f"{self.BASE_API_URL}/files"
|
|
|
+ url = f"{self.base_url}/files"
|
|
|
|
|
|
def upload_request():
|
|
|
# MEMORY OPTIMIZATION: Use context manager to minimize file handle lifetime
|
|
|
@@ -275,7 +277,7 @@ class MistralLoader:
|
|
|
|
|
|
async def _upload_file_async(self, session: aiohttp.ClientSession) -> str:
|
|
|
"""Async file upload with streaming for better memory efficiency."""
|
|
|
- url = f"{self.BASE_API_URL}/files"
|
|
|
+ url = f"{self.base_url}/files"
|
|
|
|
|
|
async def upload_request():
|
|
|
# Create multipart writer for streaming upload
|
|
|
@@ -321,7 +323,7 @@ class MistralLoader:
|
|
|
def _get_signed_url(self, file_id: str) -> str:
|
|
|
"""Retrieves a temporary signed URL for the uploaded file (sync version)."""
|
|
|
log.info(f"Getting signed URL for file ID: {file_id}")
|
|
|
- url = f"{self.BASE_API_URL}/files/{file_id}/url"
|
|
|
+ url = f"{self.base_url}/files/{file_id}/url"
|
|
|
params = {"expiry": 1}
|
|
|
signed_url_headers = {**self.headers, "Accept": "application/json"}
|
|
|
|
|
|
@@ -346,7 +348,7 @@ class MistralLoader:
|
|
|
self, session: aiohttp.ClientSession, file_id: str
|
|
|
) -> str:
|
|
|
"""Async signed URL retrieval."""
|
|
|
- url = f"{self.BASE_API_URL}/files/{file_id}/url"
|
|
|
+ url = f"{self.base_url}/files/{file_id}/url"
|
|
|
params = {"expiry": 1}
|
|
|
|
|
|
headers = {**self.headers, "Accept": "application/json"}
|
|
|
@@ -373,7 +375,7 @@ class MistralLoader:
|
|
|
def _process_ocr(self, signed_url: str) -> Dict[str, Any]:
|
|
|
"""Sends the signed URL to the OCR endpoint for processing (sync version)."""
|
|
|
log.info("Processing OCR via Mistral API")
|
|
|
- url = f"{self.BASE_API_URL}/ocr"
|
|
|
+ url = f"{self.base_url}/ocr"
|
|
|
ocr_headers = {
|
|
|
**self.headers,
|
|
|
"Content-Type": "application/json",
|
|
|
@@ -407,7 +409,7 @@ class MistralLoader:
|
|
|
self, session: aiohttp.ClientSession, signed_url: str
|
|
|
) -> Dict[str, Any]:
|
|
|
"""Async OCR processing with timing metrics."""
|
|
|
- url = f"{self.BASE_API_URL}/ocr"
|
|
|
+ url = f"{self.base_url}/ocr"
|
|
|
|
|
|
headers = {
|
|
|
**self.headers,
|
|
|
@@ -446,7 +448,7 @@ class MistralLoader:
|
|
|
def _delete_file(self, file_id: str) -> None:
|
|
|
"""Deletes the file from Mistral storage (sync version)."""
|
|
|
log.info(f"Deleting uploaded file ID: {file_id}")
|
|
|
- url = f"{self.BASE_API_URL}/files/{file_id}"
|
|
|
+ url = f"{self.base_url}/files/{file_id}"
|
|
|
|
|
|
try:
|
|
|
response = requests.delete(
|
|
|
@@ -467,7 +469,7 @@ class MistralLoader:
|
|
|
async def delete_request():
|
|
|
self._debug_log(f"Deleting file ID: {file_id}")
|
|
|
async with session.delete(
|
|
|
- url=f"{self.BASE_API_URL}/files/{file_id}",
|
|
|
+ url=f"{self.base_url}/files/{file_id}",
|
|
|
headers=self.headers,
|
|
|
timeout=aiohttp.ClientTimeout(
|
|
|
total=self.cleanup_timeout
|