浏览代码

fix: handle json output format correctly

Hisma 4 月之前
父节点
当前提交
e12a79c0e2
共有 1 个文件被更改,包括 11 次插入11 次删除
  1. 11 11
      backend/open_webui/retrieval/loaders/datalab_marker_loader.py

+ 11 - 11
backend/open_webui/retrieval/loaders/datalab_marker_loader.py

@@ -92,13 +92,7 @@ class DatalabMarkerLoader:
             "output_format": self.output_format,
         }
 
-        request_params = {
-            "filename": filename,
-            "mime_type": mime_type,
-            **form_data,
-        }
-
-        log.info(f"Datalab Marker POST request parameters: {request_params}")
+        log.info(f"Datalab Marker POST request parameters: {{'filename': '{filename}', 'mime_type': '{mime_type}', **{form_data}}}")
 
         try:
             with open(self.file_path, "rb") as f:
@@ -138,7 +132,6 @@ class DatalabMarkerLoader:
             success_val = poll_result.get("success")
 
             if status_val == "complete":
-                # Log key details
                 summary = {
                     k: poll_result.get(k)
                     for k in ("status", "output_format", "success", "error", "page_count", "total_cost")
@@ -158,11 +151,18 @@ class DatalabMarkerLoader:
             raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=f"Final processing failed: {error_msg}")
 
         content_key = self.output_format.lower()
-        full_text = poll_result.get(content_key, "").strip()
+        raw_content = poll_result.get(content_key)
+
+        if content_key == "json":
+            full_text = json.dumps(raw_content, indent=2)
+        elif content_key in {"markdown", "html"}:
+            full_text = str(raw_content).strip()
+        else:
+            raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=f"Unsupported output format: {self.output_format}")
+
         if not full_text:
             raise HTTPException(status.HTTP_400_BAD_REQUEST, detail="Datalab Marker returned empty content")
 
-        # Write output to uploads/marker_output
         marker_output_dir = os.path.join("/app/backend/data/uploads", "marker_output")
         os.makedirs(marker_output_dir, exist_ok=True)
 
@@ -197,4 +197,4 @@ class DatalabMarkerLoader:
             elif v is None:
                 metadata[k] = ""
 
-        return [Document(page_content=full_text, metadata=metadata)]
+        return [Document(page_content=full_text, metadata=metadata)]