Ver Fonte

Extend docling configuration options to include:

* do_ocr
* force_ocr
* pdf_backend
* table_mode
* pipeline

as per https://github.com/docling-project/docling-serve/blob/main/docs/usage.md

See https://github.com/open-webui/open-webui/issues/17148
Antonio Pisano há 4 semanas atrás
pai
commit
daa2a036f8

+ 30 - 0
backend/open_webui/config.py

@@ -2229,6 +2229,18 @@ DOCLING_SERVER_URL = PersistentConfig(
     os.getenv("DOCLING_SERVER_URL", "http://docling:5001"),
 )
 
+DOCLING_DO_OCR = PersistentConfig(
+    "DOCLING_DO_OCR",
+    "rag.docling_do_ocr",
+    os.getenv("DOCLING_DO_OCR", "True").lower() == "true",
+)
+
+DOCLING_FORCE_OCR = PersistentConfig(
+    "DOCLING_FORCE_OCR",
+    "rag.docling_force_ocr",
+    os.getenv("DOCLING_FORCE_OCR", "False").lower() == "true",
+)
+
 DOCLING_OCR_ENGINE = PersistentConfig(
     "DOCLING_OCR_ENGINE",
     "rag.docling_ocr_engine",
@@ -2241,6 +2253,24 @@ DOCLING_OCR_LANG = PersistentConfig(
     os.getenv("DOCLING_OCR_LANG", "eng,fra,deu,spa"),
 )
 
+DOCLING_PDF_BACKEND = PersistentConfig(
+    "DOCLING_PDF_BACKEND",
+    "rag.docling_pdf_backend",
+    os.getenv("DOCLING_PDF_BACKEND", "dlparse_v4"),
+)
+
+DOCLING_TABLE_MODE = PersistentConfig(
+    "DOCLING_TABLE_MODE",
+    "rag.docling_table_mode",
+    os.getenv("DOCLING_TABLE_MODE", "accurate"),
+)
+
+DOCLING_PIPELINE = PersistentConfig(
+    "DOCLING_PIPELINE",
+    "rag.docling_pipeline",
+    os.getenv("DOCLING_PIPELINE", "standard"),
+)
+
 DOCLING_DO_PICTURE_DESCRIPTION = PersistentConfig(
     "DOCLING_DO_PICTURE_DESCRIPTION",
     "rag.docling_do_picture_description",

+ 10 - 0
backend/open_webui/main.py

@@ -243,8 +243,13 @@ from open_webui.config import (
     EXTERNAL_DOCUMENT_LOADER_API_KEY,
     TIKA_SERVER_URL,
     DOCLING_SERVER_URL,
+    DOCLING_DO_OCR,
+    DOCLING_FORCE_OCR,
     DOCLING_OCR_ENGINE,
     DOCLING_OCR_LANG,
+    DOCLING_PDF_BACKEND,
+    DOCLING_TABLE_MODE,
+    DOCLING_PIPELINE,
     DOCLING_DO_PICTURE_DESCRIPTION,
     DOCLING_PICTURE_DESCRIPTION_MODE,
     DOCLING_PICTURE_DESCRIPTION_LOCAL,
@@ -810,8 +815,13 @@ app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
 app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY = EXTERNAL_DOCUMENT_LOADER_API_KEY
 app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
 app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL
+app.state.config.DOCLING_DO_OCR = DOCLING_DO_OCR
+app.state.config.DOCLING_FORCE_OCR = DOCLING_FORCE_OCR
 app.state.config.DOCLING_OCR_ENGINE = DOCLING_OCR_ENGINE
 app.state.config.DOCLING_OCR_LANG = DOCLING_OCR_LANG
+app.state.config.DOCLING_PDF_BACKEND = DOCLING_PDF_BACKEND
+app.state.config.DOCLING_TABLE_MODE = DOCLING_TABLE_MODE
+app.state.config.DOCLING_PIPELINE = DOCLING_PIPELINE
 app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = DOCLING_DO_PICTURE_DESCRIPTION
 app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE = DOCLING_PICTURE_DESCRIPTION_MODE
 app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL = DOCLING_PICTURE_DESCRIPTION_LOCAL

+ 16 - 2
backend/open_webui/retrieval/loaders/main.py

@@ -148,7 +148,7 @@ class DoclingLoader:
                 )
             }
 
-            params = {"image_export_mode": "placeholder", "table_mode": "accurate"}
+            params = {"image_export_mode": "placeholder"}
 
             if self.params:
                 if self.params.get("do_picture_description"):
@@ -174,7 +174,11 @@ class DoclingLoader:
                             self.params.get("picture_description_api", {})
                         )
 
-                if self.params.get("ocr_engine") and self.params.get("ocr_lang"):
+                params["do_ocr"] = self.params.get("do_ocr")
+
+                params["force_ocr"] = self.params.get("force_ocr")
+
+                if self.params.get("do_ocr") and self.params.get("ocr_engine") and self.params.get("ocr_lang"):
                     params["ocr_engine"] = self.params.get("ocr_engine")
                     params["ocr_lang"] = [
                         lang.strip()
@@ -182,6 +186,16 @@ class DoclingLoader:
                         if lang.strip()
                     ]
 
+                if self.params.get("pdf_backend"):
+                    params["pdf_backend"] = self.params.get("pdf_backend")
+
+                if self.params.get("table_mode"):
+                    params["table_mode"] = self.params.get("table_mode")
+
+                if self.params.get("pipeline"):
+                    params["pipeline"] = self.params.get("pipeline")
+
+
             endpoint = f"{self.url}/v1/convert/file"
             r = requests.post(endpoint, files=files, data=params)
 

+ 45 - 1
backend/open_webui/routers/retrieval.py

@@ -426,8 +426,13 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
         "EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY,
         "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL,
         "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL,
+        "DOCLING_DO_OCR": request.app.state.config.DOCLING_DO_OCR,
+        "DOCLING_FORCE_OCR": request.app.state.config.DOCLING_FORCE_OCR,
         "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE,
         "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG,
+        "DOCLING_PDF_BACKEND": request.app.state.config.DOCLING_PDF_BACKEND,
+        "DOCLING_TABLE_MODE": request.app.state.config.DOCLING_TABLE_MODE,
+        "DOCLING_PIPELINE": request.app.state.config.DOCLING_PIPELINE,
         "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
         "DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
         "DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL,
@@ -596,8 +601,13 @@ class ConfigForm(BaseModel):
 
     TIKA_SERVER_URL: Optional[str] = None
     DOCLING_SERVER_URL: Optional[str] = None
+    DOCLING_DO_OCR: Optional[bool] = None
+    DOCLING_FORCE_OCR: Optional[bool] = None
     DOCLING_OCR_ENGINE: Optional[str] = None
     DOCLING_OCR_LANG: Optional[str] = None
+    DOCLING_PDF_BACKEND: Optional[str] = None
+    DOCLING_TABLE_MODE: Optional[str] = None
+    DOCLING_PIPELINE: Optional[str] = None
     DOCLING_DO_PICTURE_DESCRIPTION: Optional[bool] = None
     DOCLING_PICTURE_DESCRIPTION_MODE: Optional[str] = None
     DOCLING_PICTURE_DESCRIPTION_LOCAL: Optional[dict] = None
@@ -767,6 +777,16 @@ async def update_rag_config(
         if form_data.DOCLING_SERVER_URL is not None
         else request.app.state.config.DOCLING_SERVER_URL
     )
+    request.app.state.config.DOCLING_DO_OCR = (
+        form_data.DOCLING_DO_OCR
+        if form_data.DOCLING_DO_OCR is not None
+        else request.app.state.config.DOCLING_DO_OCR
+    )
+    request.app.state.config.DOCLING_FORCE_OCR = (
+        form_data.DOCLING_FORCE_OCR
+        if form_data.DOCLING_FORCE_OCR is not None
+        else request.app.state.config.DOCLING_FORCE_OCR
+    )
     request.app.state.config.DOCLING_OCR_ENGINE = (
         form_data.DOCLING_OCR_ENGINE
         if form_data.DOCLING_OCR_ENGINE is not None
@@ -777,7 +797,21 @@ async def update_rag_config(
         if form_data.DOCLING_OCR_LANG is not None
         else request.app.state.config.DOCLING_OCR_LANG
     )
-
+    request.app.state.config.DOCLING_PDF_BACKEND = (
+        form_data.DOCLING_PDF_BACKEND
+        if form_data.DOCLING_PDF_BACKEND is not None
+        else request.app.state.config.DOCLING_PDF_BACKEND
+    )
+    request.app.state.config.DOCLING_TABLE_MODE = (
+        form_data.DOCLING_TABLE_MODE
+        if form_data.DOCLING_TABLE_MODE is not None
+        else request.app.state.config.DOCLING_TABLE_MODE
+    )
+    request.app.state.config.DOCLING_PIPELINE = (
+        form_data.DOCLING_PIPELINE
+        if form_data.DOCLING_PIPELINE is not None
+        else request.app.state.config.DOCLING_PIPELINE
+    )
     request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = (
         form_data.DOCLING_DO_PICTURE_DESCRIPTION
         if form_data.DOCLING_DO_PICTURE_DESCRIPTION is not None
@@ -1062,8 +1096,13 @@ async def update_rag_config(
         "EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY,
         "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL,
         "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL,
+        "DOCLING_DO_OCR": request.app.state.config.DOCLING_DO_OCR,
+        "DOCLING_FORCE_OCR": request.app.state.config.DOCLING_FORCE_OCR,
         "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE,
         "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG,
+        "DOCLING_PDF_BACKEND": request.app.state.config.DOCLING_PDF_BACKEND,
+        "DOCLING_TABLE_MODE": request.app.state.config.DOCLING_TABLE_MODE,
+        "DOCLING_PIPELINE": request.app.state.config.DOCLING_PIPELINE,
         "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
         "DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
         "DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL,
@@ -1453,8 +1492,13 @@ def process_file(
                     TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL,
                     DOCLING_SERVER_URL=request.app.state.config.DOCLING_SERVER_URL,
                     DOCLING_PARAMS={
+                        "do_ocr": request.app.state.config.DOCLING_DO_OCR,
+                        "force_ocr": request.app.state.config.DOCLING_FORCE_OCR,
                         "ocr_engine": request.app.state.config.DOCLING_OCR_ENGINE,
                         "ocr_lang": request.app.state.config.DOCLING_OCR_LANG,
+                        "pdf_backend": request.app.state.config.DOCLING_PDF_BACKEND,
+                        "table_mode": request.app.state.config.DOCLING_TABLE_MODE,
+                        "pipeline": request.app.state.config.DOCLING_PIPELINE,
                         "do_picture_description": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
                         "picture_description_mode": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
                         "picture_description_local": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL,

+ 96 - 13
src/lib/components/admin/Settings/Documents.svelte

@@ -152,7 +152,8 @@
 			return;
 		}
 		if (
-			RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
+                        RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
+                        RAGConfig.DOCLING_DO_OCR &&
 			((RAGConfig.DOCLING_OCR_ENGINE === '' && RAGConfig.DOCLING_OCR_LANG !== '') ||
 				(RAGConfig.DOCLING_OCR_ENGINE !== '' && RAGConfig.DOCLING_OCR_LANG === ''))
 		) {
@@ -161,6 +162,16 @@
 			);
 			return;
 		}
+		if (
+			RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
+		        RAGConfig.DOCLING_DO_OCR === false &&
+                        RAGConfig.DOCLING_FORCE_OCR === true
+		) {
+			toast.error(
+			  $i18n.t('In order to force OCR, performing OCR must be enabled.')
+			);
+			return;
+		}
 
 		if (
 			RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' &&
@@ -544,21 +555,93 @@
 									placeholder={$i18n.t('Enter Docling Server URL')}
 									bind:value={RAGConfig.DOCLING_SERVER_URL}
 								/>
-							</div>
+                                                        </div>
+
 							<div class="flex w-full mt-2">
-								<input
-									class="flex-1 w-full text-sm bg-transparent outline-hidden"
-									placeholder={$i18n.t('Enter Docling OCR Engine')}
-									bind:value={RAGConfig.DOCLING_OCR_ENGINE}
-								/>
-								<input
-									class="flex-1 w-full text-sm bg-transparent outline-hidden"
-									placeholder={$i18n.t('Enter Docling OCR Language(s)')}
-									bind:value={RAGConfig.DOCLING_OCR_LANG}
-								/>
+								<div class="flex-1 flex justify-between">
+									<div class=" self-center text-xs font-medium">
+										{$i18n.t('Perform OCR')}
+									</div>
+									<div class="flex items-center relative">
+										<Switch bind:state={RAGConfig.DOCLING_DO_OCR} />
+									</div>
+								</div>
 							</div>
-
+							{#if RAGConfig.DOCLING_DO_OCR}
+                                                                <div class="flex w-full mt-2">
+                                                                        <input
+                                                                                class="flex-1 w-full text-sm bg-transparent outline-hidden"
+                                                                                placeholder={$i18n.t('Enter Docling OCR Engine')}
+                                                                                bind:value={RAGConfig.DOCLING_OCR_ENGINE}
+                                                                        />
+                                                                        <input
+                                                                                class="flex-1 w-full text-sm bg-transparent outline-hidden"
+                                                                                placeholder={$i18n.t('Enter Docling OCR Language(s)')}
+                                                                                bind:value={RAGConfig.DOCLING_OCR_LANG}
+                                                                        />
+                                                                </div>
+                                                        {/if}
 							<div class="flex w-full mt-2">
+								<div class="flex-1 flex justify-between">
+									<div class=" self-center text-xs font-medium">
+										{$i18n.t('Force OCR')}
+									</div>
+									<div class="flex items-center relative">
+										<Switch bind:state={RAGConfig.DOCLING_FORCE_OCR} />
+									</div>
+								</div>
+							</div>
+                                                        <div class="flex justify-between w-full mt-2">
+                                                                <div class="self-center text-xs font-medium">
+                                                                        <Tooltip content={''} placement="top-start">
+                                                                                {$i18n.t('PDF Backend')}
+                                                                        </Tooltip>
+                                                                </div>
+                                                                <div class="">
+                                                                        <select
+                                                                                class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
+                                                                                bind:value={RAGConfig.DOCLING_PDF_BACKEND}
+                                                                        >
+                                                                                <option value="pypdfium2">{$i18n.t('pypdfium2')}</option>
+                                                                                <option value="dlparse_v1">{$i18n.t('dlparse_v1')}</option>
+                                                                                <option value="dlparse_v2">{$i18n.t('dlparse_v2')}</option>
+                                                                                <option value="dlparse_v4">{$i18n.t('dlparse_v4')}</option>
+                                                                        </select>
+                                                                </div>
+                                                        </div>
+                                                        <div class="flex justify-between w-full mt-2">
+                                                                <div class="self-center text-xs font-medium">
+                                                                        <Tooltip content={''} placement="top-start">
+                                                                                {$i18n.t('Table Mode')}
+                                                                        </Tooltip>
+                                                                </div>
+                                                                <div class="">
+                                                                        <select
+                                                                                class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
+                                                                                bind:value={RAGConfig.DOCLING_TABLE_MODE}
+                                                                        >
+                                                                                <option value="fast">{$i18n.t('fast')}</option>
+                                                                                <option value="accurate">{$i18n.t('accurate')}</option>
+                                                                        </select>
+                                                                </div>
+                                                        </div>
+                                                        <div class="flex justify-between w-full mt-2">
+                                                                <div class="self-center text-xs font-medium">
+                                                                        <Tooltip content={''} placement="top-start">
+                                                                                {$i18n.t('Pipeline')}
+                                                                        </Tooltip>
+                                                                </div>
+                                                                <div class="">
+                                                                        <select
+                                                                                class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
+                                                                                bind:value={RAGConfig.DOCLING_PIPELINE}
+                                                                        >
+                                                                                <option value="standard">{$i18n.t('standard')}</option>
+                                                                                <option value="vlm">{$i18n.t('vlm')}</option>
+                                                                        </select>
+                                                                </div>
+                                                        </div>
+                                                        <div class="flex w-full mt-2">
 								<div class="flex-1 flex justify-between">
 									<div class=" self-center text-xs font-medium">
 										{$i18n.t('Describe Pictures in Documents')}