1
0
Эх сурвалжийг харах

feat: added ocr functionality to the pdf loader

Jannik Streidl 1 жил өмнө
parent
commit
089a63e0c6

+ 1 - 1
backend/apps/rag/main.py

@@ -419,7 +419,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
     ]
 
     if file_ext == "pdf":
-        loader = PyPDFLoader(file_path)
+        loader = PyPDFLoader(file_path, extract_images=True)
     elif file_ext == "csv":
         loader = CSVLoader(file_path)
     elif file_ext == "rst":

+ 1 - 0
backend/requirements.txt

@@ -33,6 +33,7 @@ pandas
 openpyxl
 pyxlsb
 xlrd
+rapidocr-onnxruntime
 
 faster-whisper