Forráskód Böngészése

Merge pull request #16185 from hiwylee/vector-search-branch

feat: oracle 23ai Vector search for new supported vector db
Tim Jaeryang Baek 2 hónapja
szülő
commit
70eb83b701

+ 2 - 0
.gitignore

@@ -1,3 +1,5 @@
+x.py
+yarn.lock
 .DS_Store
 node_modules
 /build

+ 35 - 0
backend/open_webui/config.py

@@ -1994,6 +1994,41 @@ PINECONE_DIMENSION = int(os.getenv("PINECONE_DIMENSION", 1536))  # or 3072, 1024
 PINECONE_METRIC = os.getenv("PINECONE_METRIC", "cosine")
 PINECONE_CLOUD = os.getenv("PINECONE_CLOUD", "aws")  # or "gcp" or "azure"
 
+# ORACLE23AI (Oracle23ai Vector Search)
+
+ORACLE_DB_USE_WALLET = os.environ.get("ORACLE_DB_USE_WALLET", "false").lower() == "true"
+ORACLE_DB_USER = os.environ.get("ORACLE_DB_USER", None)              #
+ORACLE_DB_PASSWORD = os.environ.get("ORACLE_DB_PASSWORD", None) #
+ORACLE_DB_DSN = os.environ.get("ORACLE_DB_DSN", None)                  #
+ORACLE_WALLET_DIR = os.environ.get("ORACLE_WALLET_DIR", None)
+ORACLE_WALLET_PASSWORD = os.environ.get("ORACLE_WALLET_PASSWORD", None)
+ORACLE_VECTOR_LENGTH = os.environ.get("ORACLE_VECTOR_LENGTH", 768)
+
+ORACLE_DB_POOL_MIN = int(os.environ.get("ORACLE_DB_POOL_MIN", 2))
+ORACLE_DB_POOL_MAX = int(os.environ.get("ORACLE_DB_POOL_MAX", 10))
+ORACLE_DB_POOL_INCREMENT = int(os.environ.get("ORACLE_DB_POOL_INCREMENT", 1))
+
+log.info(f"VECTOR_DB: {VECTOR_DB}")
+log.info(f"ORACLE_DB_USE_WALLET: {ORACLE_DB_USE_WALLET}/type: {type(ORACLE_DB_USE_WALLET)}")
+log.info(f"ORACLE_DB_USER: {ORACLE_DB_USER}/type: {type(ORACLE_DB_USER)}")
+log.info(f"ORACLE_DB_PASSWORD: {ORACLE_DB_PASSWORD}/type: {type(ORACLE_DB_PASSWORD)}")
+log.info(f"ORACLE_DB_DSN: {ORACLE_DB_DSN}/type: {type(ORACLE_DB_DSN)}")
+log.info(f"ORACLE_WALLET_DIR: {ORACLE_WALLET_DIR}/type: {type(ORACLE_WALLET_DIR)}")
+log.info(f"ORACLE_WALLET_PASSWORD: {ORACLE_WALLET_PASSWORD}/type: {type(ORACLE_WALLET_PASSWORD)}")
+log.info(f"ORACLE_VECTOR_LENGTH: {ORACLE_VECTOR_LENGTH}")
+log.info(f"ORACLE_DB_POOL_MIN: {ORACLE_DB_POOL_MIN}")
+log.info(f"ORACLE_DB_POOL_MAX: {ORACLE_DB_POOL_MAX}")
+log.info(f"ORACLE_DB_POOL_INCREMENT: {ORACLE_DB_POOL_INCREMENT}")
+
+if VECTOR_DB == "oracle23ai" and not ORACLE_DB_USER or not ORACLE_DB_PASSWORD or not ORACLE_DB_DSN:
+    raise ValueError(
+        "Oracle23ai requires setting ORACLE_DB_USER, ORACLE_DB_PASSWORD, and ORACLE_DB_DSN."
+    )
+if VECTOR_DB == "oracle23ai" and ORACLE_DB_USE_WALLET and (not ORACLE_WALLET_DIR or not ORACLE_WALLET_PASSWORD):
+    raise ValueError(
+        "Oracle23ai requires setting ORACLE_WALLET_DIR and ORACLE_WALLET_PASSWORD when using wallet authentication."
+    )
+
 # S3 Vector
 S3_VECTOR_BUCKET_NAME = os.environ.get("S3_VECTOR_BUCKET_NAME", None)
 S3_VECTOR_REGION = os.environ.get("S3_VECTOR_REGION", None)

+ 888 - 0
backend/open_webui/retrieval/vector/dbs/oracle23ai.py

@@ -0,0 +1,888 @@
+"""
+Oracle 23ai Vector Database Client - Fixed Version
+
+# .env
+VECTOR_DB = "oracle23ai"
+
+## DBCS or oracle 23ai free
+ORACLE_DB_USE_WALLET =  false
+ORACLE_DB_USER = "DEMOUSER"
+ORACLE_DB_PASSWORD = "Welcome123456"
+ORACLE_DB_DSN = "localhost:1521/FREEPDB1"
+
+## ADW or ATP
+# ORACLE_DB_USE_WALLET =  true 
+# ORACLE_DB_USER = "DEMOUSER"
+# ORACLE_DB_PASSWORD = "Welcome123456"
+# ORACLE_DB_DSN = "medium" 
+# ORACLE_DB_DSN = "(description=  (retry_count=3)(retry_delay=3)(address=(protocol=tcps)(port=1522)(host=xx.oraclecloud.com))(connect_data=(service_name=yy.adb.oraclecloud.com))(security=(ssl_server_dn_match=no)))"
+# ORACLE_WALLET_DIR = "/home/opc/adb_wallet"
+# ORACLE_WALLET_PASSWORD = "Welcome1"
+
+ORACLE_VECTOR_LENGTH = 768
+
+ORACLE_DB_POOL_MIN = 2
+ORACLE_DB_POOL_MAX = 10
+ORACLE_DB_POOL_INCREMENT = 1
+"""
+
+from typing import Optional, List, Dict, Any, Union
+from decimal import Decimal
+import logging
+import os
+import threading
+import time
+import json
+import array
+import oracledb
+
+from open_webui.retrieval.vector.main import (
+    VectorDBBase,
+    VectorItem,
+    SearchResult,
+    GetResult,
+)
+
+from open_webui.config import (
+    ORACLE_DB_USE_WALLET,
+    ORACLE_DB_USER,
+    ORACLE_DB_PASSWORD,
+    ORACLE_DB_DSN,
+    ORACLE_WALLET_DIR,
+    ORACLE_WALLET_PASSWORD,
+    ORACLE_VECTOR_LENGTH,
+    ORACLE_DB_POOL_MIN,
+    ORACLE_DB_POOL_MAX,
+    ORACLE_DB_POOL_INCREMENT,
+)
+from open_webui.env import SRC_LOG_LEVELS
+
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+
+
+class Oracle23aiClient(VectorDBBase):
+    """
+    Oracle Vector Database Client for vector similarity search using Oracle Database 23ai.
+    
+    This client provides an interface to store, retrieve, and search vector embeddings
+    in an Oracle database. It uses connection pooling for efficient database access
+    and supports vector similarity search operations.
+    
+    Attributes:
+        pool: Connection pool for Oracle database connections
+    """
+    
+    def __init__(self) -> None:
+        """
+        Initialize the Oracle23aiClient with a connection pool.
+        
+        Creates a connection pool with configurable min/max connections, initializes
+        the database schema if needed, and sets up necessary tables and indexes.
+        
+        Raises:
+            ValueError: If required configuration parameters are missing
+            Exception: If database initialization fails
+        """
+        self.pool = None
+        
+        try:
+            # Create the appropriate connection pool based on DB type
+            if ORACLE_DB_USE_WALLET:
+                self._create_adb_pool()
+            else:  # DBCS
+                self._create_dbcs_pool()
+            
+            dsn = ORACLE_DB_DSN 
+            log.info(f"Creating Connection Pool [{ORACLE_DB_USER}:**@{dsn}]")
+            
+            with self.get_connection() as connection:
+                log.info(f"Connection version: {connection.version}")
+                self._initialize_database(connection)
+                
+            log.info("Oracle Vector Search initialization complete.")
+        except Exception as e:
+            log.exception(f"Error during Oracle Vector Search initialization: {e}")
+            raise
+    
+    def _create_adb_pool(self) -> None:
+        """
+        Create connection pool for Oracle Autonomous Database.
+        
+        Uses wallet-based authentication.
+        """
+        self.pool = oracledb.create_pool(
+            user=ORACLE_DB_USER,
+            password=ORACLE_DB_PASSWORD,
+            dsn=ORACLE_DB_DSN,
+            min=ORACLE_DB_POOL_MIN,
+            max=ORACLE_DB_POOL_MAX,
+            increment=ORACLE_DB_POOL_INCREMENT,
+            config_dir=ORACLE_WALLET_DIR,
+            wallet_location=ORACLE_WALLET_DIR,
+            wallet_password=ORACLE_WALLET_PASSWORD
+        )
+        log.info("Created ADB connection pool with wallet authentication.")
+    
+    def _create_dbcs_pool(self) -> None:
+        """
+        Create connection pool for Oracle Database Cloud Service.
+        
+        Uses basic authentication without wallet.
+        """
+        self.pool = oracledb.create_pool(
+            user=ORACLE_DB_USER,
+            password=ORACLE_DB_PASSWORD,
+            dsn=ORACLE_DB_DSN,
+            min=ORACLE_DB_POOL_MIN,
+            max=ORACLE_DB_POOL_MAX,
+            increment=ORACLE_DB_POOL_INCREMENT
+        )
+        log.info("Created DB connection pool with basic authentication.")
+    
+    def get_connection(self):
+        """
+        Acquire a connection from the connection pool with retry logic.
+
+        Returns:
+            connection: A database connection with output type handler configured
+        """
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                connection = self.pool.acquire()
+                connection.outputtypehandler = self._output_type_handler
+                return connection
+            except oracledb.DatabaseError as e:
+                error_obj, = e.args
+                log.exception(f"Connection attempt {attempt + 1} failed: {error_obj.message}")
+
+                if attempt < max_retries - 1:
+                    wait_time = 2 ** attempt
+                    log.info(f"Retrying in {wait_time} seconds...")
+                    time.sleep(wait_time)
+                else:
+                    raise    
+
+    def start_health_monitor(self, interval_seconds: int = 60):
+        """
+        Start a background thread to periodically check the health of the connection pool.
+
+        Args:
+            interval_seconds (int): Number of seconds between health checks
+        """
+        def _monitor():
+            while True:
+                try:
+                    log.info("[HealthCheck] Running periodic DB health check...")
+                    self.ensure_connection()
+                    log.info("[HealthCheck] Connection is healthy.")
+                except Exception as e:
+                    log.exception(f"[HealthCheck] Connection health check failed: {e}")
+                time.sleep(interval_seconds)
+
+        thread = threading.Thread(target=_monitor, daemon=True)
+        thread.start()
+        log.info(f"Started DB health monitor every {interval_seconds} seconds.")
+
+    def _reconnect_pool(self):
+        """
+        Attempt to reinitialize the connection pool if it's been closed or broken.
+        """
+        try:
+            log.info("Attempting to reinitialize the Oracle connection pool...")
+            
+            # Close existing pool if it exists
+            if self.pool:
+                try:
+                    self.pool.close()
+                except Exception as close_error:
+                    log.warning(f"Error closing existing pool: {close_error}")
+            
+            # Re-create the appropriate connection pool based on DB type
+            if ORACLE_DB_USE_WALLET:
+                self._create_adb_pool()
+            else:  # DBCS
+                self._create_dbcs_pool()
+                
+            log.info("Connection pool reinitialized.")
+        except Exception as e:
+            log.exception(f"Failed to reinitialize the connection pool: {e}")
+            raise
+
+    def ensure_connection(self):
+        """
+        Ensure the database connection is alive, reconnecting pool if needed.
+        """
+        try:
+            with self.get_connection() as connection:
+                with connection.cursor() as cursor:
+                    cursor.execute("SELECT 1 FROM dual")
+        except Exception as e:
+            log.exception(f"Connection check failed: {e}, attempting to reconnect pool...")
+            self._reconnect_pool()
+
+    def _output_type_handler(self, cursor, metadata):
+        """
+        Handle Oracle vector type conversion.
+        
+        Args:
+            cursor: Oracle database cursor
+            metadata: Metadata for the column
+            
+        Returns:
+            A variable with appropriate conversion for vector types
+        """
+        if metadata.type_code is oracledb.DB_TYPE_VECTOR:
+            return cursor.var(metadata.type_code, arraysize=cursor.arraysize,
+                            outconverter=list)
+
+    def _initialize_database(self, connection) -> None:
+        """
+        Initialize database schema, tables and indexes.
+        
+        Creates the document_chunk table and necessary indexes if they don't exist.
+        
+        Args:
+            connection: Oracle database connection
+            
+        Raises:
+            Exception: If schema initialization fails
+        """
+        with connection.cursor() as cursor:
+            try:
+                log.info("Creating Table document_chunk")
+                cursor.execute("""
+                    BEGIN
+                        EXECUTE IMMEDIATE '
+                            CREATE TABLE IF NOT EXISTS document_chunk (
+                                id VARCHAR2(255) PRIMARY KEY,
+                                collection_name VARCHAR2(255) NOT NULL,
+                                text CLOB,
+                                vmetadata JSON,
+                                vector vector(*, float32)
+                            )
+                        ';
+                    EXCEPTION
+                        WHEN OTHERS THEN
+                            IF SQLCODE != -955 THEN
+                                RAISE;
+                            END IF;
+                    END;
+                """)
+                
+                log.info("Creating Index document_chunk_collection_name_idx")
+                cursor.execute("""
+                    BEGIN
+                        EXECUTE IMMEDIATE '
+                            CREATE INDEX IF NOT EXISTS document_chunk_collection_name_idx
+                            ON document_chunk (collection_name)
+                        ';
+                    EXCEPTION
+                        WHEN OTHERS THEN
+                            IF SQLCODE != -955 THEN
+                                RAISE;
+                            END IF;
+                    END;
+                """)
+                
+                log.info("Creating VECTOR INDEX document_chunk_vector_ivf_idx")
+                cursor.execute("""
+                    BEGIN
+                        EXECUTE IMMEDIATE '
+                            CREATE VECTOR INDEX IF NOT EXISTS document_chunk_vector_ivf_idx 
+                            ON document_chunk(vector) 
+                            ORGANIZATION NEIGHBOR PARTITIONS
+                            DISTANCE COSINE
+                            WITH TARGET ACCURACY 95
+                            PARAMETERS (TYPE IVF, NEIGHBOR PARTITIONS 100)
+                        ';
+                    EXCEPTION
+                        WHEN OTHERS THEN
+                            IF SQLCODE != -955 THEN
+                                RAISE;
+                            END IF;
+                    END;
+                """)
+                
+                connection.commit()
+                log.info("Database initialization completed successfully.")
+                
+            except Exception as e:
+                connection.rollback()
+                log.exception(f"Error during database initialization: {e}")
+                raise
+
+    def check_vector_length(self) -> None:
+        """
+        Check vector length compatibility (placeholder).
+        
+        This method would check if the configured vector length matches the database schema.
+        Currently implemented as a placeholder.
+        """
+        pass
+
+    def _vector_to_blob(self, vector: List[float]) -> bytes:
+        """
+        Convert a vector to Oracle BLOB format.
+        
+        Args:
+            vector (List[float]): The vector to convert
+            
+        Returns:
+            bytes: The vector in Oracle BLOB format
+        """
+        return array.array("f", vector)
+
+    def adjust_vector_length(self, vector: List[float]) -> List[float]:
+        """
+        Adjust vector to the expected length if needed.
+        
+        Args:
+            vector (List[float]): The vector to adjust
+            
+        Returns:
+            List[float]: The adjusted vector
+        """
+        return vector
+    
+    def _decimal_handler(self, obj):
+        """
+        Handle Decimal objects for JSON serialization.
+        
+        Args:
+            obj: Object to serialize
+            
+        Returns:
+            float: Converted decimal value
+            
+        Raises:
+            TypeError: If object is not JSON serializable
+        """
+        if isinstance(obj, Decimal):
+            return float(obj)
+        raise TypeError(f"{obj} is not JSON serializable")
+
+    def _metadata_to_json(self, metadata: Dict) -> str:
+        """
+        Convert metadata dictionary to JSON string.
+        
+        Args:
+            metadata (Dict): Metadata dictionary
+            
+        Returns:
+            str: JSON representation of metadata
+        """
+        return json.dumps(metadata, default=self._decimal_handler) if metadata else "{}"
+
+    def _json_to_metadata(self, json_str: str) -> Dict:
+        """
+        Convert JSON string to metadata dictionary.
+        
+        Args:
+            json_str (str): JSON string
+            
+        Returns:
+            Dict: Metadata dictionary
+        """
+        return json.loads(json_str) if json_str else {}
+
+    def insert(self, collection_name: str, items: List[VectorItem]) -> None:
+        """
+        Insert vector items into the database.
+        
+        Args:
+            collection_name (str): Name of the collection
+            items (List[VectorItem]): List of vector items to insert
+            
+        Raises:
+            Exception: If insertion fails
+            
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> items = [
+            ...     {"id": "1", "text": "Sample text", "vector": [0.1, 0.2, ...], "metadata": {"source": "doc1"}},
+            ...     {"id": "2", "text": "Another text", "vector": [0.3, 0.4, ...], "metadata": {"source": "doc2"}}
+            ... ]
+            >>> client.insert("my_collection", items)
+        """
+        log.info(f"Inserting {len(items)} items into collection '{collection_name}'.")
+        
+        with self.get_connection() as connection:
+            try:
+                with connection.cursor() as cursor:
+                    for item in items:
+                        vector_blob = self._vector_to_blob(item["vector"])
+                        metadata_json = self._metadata_to_json(item["metadata"])
+                        
+                        cursor.execute("""
+                            INSERT INTO document_chunk 
+                            (id, collection_name, text, vmetadata, vector) 
+                            VALUES (:id, :collection_name, :text, :metadata, :vector)
+                        """, {
+                            'id': item["id"],
+                            'collection_name': collection_name,
+                            'text': item["text"],
+                            'metadata': metadata_json,
+                            'vector': vector_blob                   
+                        })
+                
+                connection.commit()
+                log.info(f"Successfully inserted {len(items)} items into collection '{collection_name}'.")
+
+            except Exception as e:
+                connection.rollback()
+                log.exception(f"Error during insert: {e}")
+                raise
+
+    def upsert(self, collection_name: str, items: List[VectorItem]) -> None:
+        """
+        Update or insert vector items into the database.
+        
+        If an item with the same ID exists, it will be updated;
+        otherwise, it will be inserted.
+        
+        Args:
+            collection_name (str): Name of the collection
+            items (List[VectorItem]): List of vector items to upsert
+            
+        Raises:
+            Exception: If upsert operation fails
+
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> items = [
+            ...     {"id": "1", "text": "Updated text", "vector": [0.1, 0.2, ...], "metadata": {"source": "doc1"}},
+            ...     {"id": "3", "text": "New item", "vector": [0.5, 0.6, ...], "metadata": {"source": "doc3"}}
+            ... ]
+            >>> client.upsert("my_collection", items)
+        """
+        log.info(f"Upserting {len(items)} items into collection '{collection_name}'.")
+
+        with self.get_connection() as connection:
+            try:
+                with connection.cursor() as cursor:
+                    for item in items:
+                        vector_blob = self._vector_to_blob(item["vector"])
+                        metadata_json = self._metadata_to_json(item["metadata"])
+                        
+                        cursor.execute("""
+                            MERGE INTO document_chunk d
+                            USING (SELECT :merge_id as id FROM dual) s
+                            ON (d.id = s.id)
+                            WHEN MATCHED THEN
+                                UPDATE SET 
+                                    collection_name = :upd_collection_name,
+                                    text = :upd_text,
+                                    vmetadata = :upd_metadata,
+                                    vector = :upd_vector
+                            WHEN NOT MATCHED THEN
+                                INSERT (id, collection_name, text, vmetadata, vector)
+                                VALUES (:ins_id, :ins_collection_name, :ins_text, :ins_metadata, :ins_vector)
+                        """, {
+                            'merge_id': item["id"],
+                            'upd_collection_name': collection_name,
+                            'upd_text': item["text"],
+                            'upd_metadata': metadata_json,
+                            'upd_vector': vector_blob,                    
+                            'ins_id': item["id"],
+                            'ins_collection_name': collection_name,
+                            'ins_text': item["text"],
+                            'ins_metadata': metadata_json,
+                            'ins_vector': vector_blob
+                        })
+                
+                connection.commit()
+                log.info(f"Successfully upserted {len(items)} items into collection '{collection_name}'.")
+
+            except Exception as e:
+                connection.rollback()
+                log.exception(f"Error during upsert: {e}")
+                raise
+
+    def search(
+        self,
+        collection_name: str,
+        vectors: List[List[Union[float, int]]],
+        limit: int
+    ) -> Optional[SearchResult]:
+        """
+        Search for similar vectors in the database.
+        
+        Performs vector similarity search using cosine distance.
+        
+        Args:
+            collection_name (str): Name of the collection to search
+            vectors (List[List[Union[float, int]]]): Query vectors to find similar items for
+            limit (int): Maximum number of results to return per query
+            
+        Returns:
+            Optional[SearchResult]: Search results containing ids, distances, documents, and metadata
+            
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> query_vector = [0.1, 0.2, 0.3, ...]  # Must match VECTOR_LENGTH
+            >>> results = client.search("my_collection", [query_vector], limit=5)
+            >>> if results:
+            ...     log.info(f"Found {len(results.ids[0])} matches")
+            ...     for i, (id, dist) in enumerate(zip(results.ids[0], results.distances[0])):
+            ...         log.info(f"Match {i+1}: id={id}, distance={dist}")
+        """
+        log.info(f"Searching items from collection '{collection_name}' with limit {limit}.")
+        
+        try:
+            if not vectors:
+                log.warning("No vectors provided for search.")
+                return None
+            
+            num_queries = len(vectors)
+            
+            ids = [[] for _ in range(num_queries)]
+            distances = [[] for _ in range(num_queries)]
+            documents = [[] for _ in range(num_queries)]
+            metadatas = [[] for _ in range(num_queries)]
+            
+            with self.get_connection() as connection:
+                with connection.cursor() as cursor:
+                    for qid, vector in enumerate(vectors):
+                        vector_blob = self._vector_to_blob(vector)
+                        
+                        cursor.execute("""
+                            SELECT dc.id, dc.text, 
+                                JSON_SERIALIZE(dc.vmetadata RETURNING VARCHAR2(4096)) as vmetadata,
+                                VECTOR_DISTANCE(dc.vector, :query_vector, COSINE) as distance
+                            FROM document_chunk dc
+                            WHERE dc.collection_name = :collection_name
+                            ORDER BY VECTOR_DISTANCE(dc.vector, :query_vector, COSINE)
+                            FETCH APPROX FIRST :limit ROWS ONLY
+                        """, {
+                            'query_vector': vector_blob,
+                            'collection_name': collection_name,
+                            'limit': limit
+                        })
+                        
+                        results = cursor.fetchall()
+                        
+                        for row in results:
+                            ids[qid].append(row[0])
+                            documents[qid].append(row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]))
+                            # 🔧 FIXED: Parse JSON metadata properly
+                            metadata_str = row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]
+                            metadatas[qid].append(self._json_to_metadata(metadata_str))
+                            distances[qid].append(float(row[3]))
+            
+            log.info(f"Search completed. Found {sum(len(ids[i]) for i in range(num_queries))} total results.")
+
+            return SearchResult(
+                ids=ids,
+                distances=distances,
+                documents=documents,
+                metadatas=metadatas
+            )
+            
+        except Exception as e:
+            log.exception(f"Error during search: {e}")
+            return None
+
+    def query(
+        self, 
+        collection_name: str, 
+        filter: Dict, 
+        limit: Optional[int] = None
+    ) -> Optional[GetResult]:
+        """
+        Query items based on metadata filters.
+        
+        Retrieves items that match specified metadata criteria.
+        
+        Args:
+            collection_name (str): Name of the collection to query
+            filter (Dict[str, Any]): Metadata filters to apply
+            limit (Optional[int]): Maximum number of results to return
+            
+        Returns:
+            Optional[GetResult]: Query results containing ids, documents, and metadata
+            
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> filter = {"source": "doc1", "category": "finance"}
+            >>> results = client.query("my_collection", filter, limit=20)
+            >>> if results:
+            ...     print(f"Found {len(results.ids[0])} matching documents")
+        """
+        log.info(f"Querying items from collection '{collection_name}' with filters.")
+        
+        try:
+            limit = limit or 100
+            
+            query = """
+                SELECT id, text, JSON_SERIALIZE(vmetadata RETURNING VARCHAR2(4096)) as vmetadata 
+                FROM document_chunk
+                WHERE collection_name = :collection_name
+            """
+            
+            params = {'collection_name': collection_name}
+            
+            for i, (key, value) in enumerate(filter.items()):
+                param_name = f"value_{i}"
+                query += f" AND JSON_VALUE(vmetadata, '$.{key}' RETURNING VARCHAR2(4096)) = :{param_name}"
+                params[param_name] = str(value)
+            
+            query += " FETCH FIRST :limit ROWS ONLY"
+            params['limit'] = limit
+            
+            with self.get_connection() as connection:
+                with connection.cursor() as cursor:
+                    cursor.execute(query, params)
+                    results = cursor.fetchall()
+            
+            if not results:
+                log.info("No results found for query.")
+                return None
+            
+            ids = [[row[0] for row in results]]
+            documents = [[row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]) for row in results]]
+            # 🔧 FIXED: Parse JSON metadata properly
+            metadatas = [[self._json_to_metadata(row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]) for row in results]]
+            
+            log.info(f"Query completed. Found {len(results)} results.")
+            
+            return GetResult(
+                ids=ids,
+                documents=documents,
+                metadatas=metadatas
+            )
+            
+        except Exception as e:
+            log.exception(f"Error during query: {e}")
+            return None
+
+    def get(
+        self, 
+        collection_name: str
+    ) -> Optional[GetResult]:
+        """
+        Get all items in a collection.
+        
+        Retrieves items from a specified collection up to the limit.
+        
+        Args:
+            collection_name (str): Name of the collection to retrieve
+            limit (Optional[int]): Maximum number of items to retrieve
+            
+        Returns:
+            Optional[GetResult]: Result containing ids, documents, and metadata
+            
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> results = client.get("my_collection", limit=50)
+            >>> if results:
+            ...     print(f"Retrieved {len(results.ids[0])} documents from collection")
+        """
+        log.info(f"Getting items from collection '{collection_name}' with limit {limit}.")
+        
+        try:
+            limit = limit or 1000
+            
+            with self.get_connection() as connection:
+                with connection.cursor() as cursor:
+                    cursor.execute("""
+                        SELECT /*+ MONITOR */ id, text, JSON_SERIALIZE(vmetadata RETURNING VARCHAR2(4096)) as vmetadata
+                        FROM document_chunk
+                        WHERE collection_name = :collection_name
+                        FETCH FIRST :limit ROWS ONLY
+                    """, {
+                        'collection_name': collection_name,
+                        'limit': limit
+                    })
+                    
+                    results = cursor.fetchall()
+            
+            if not results:
+                log.info("No results found.")
+                return None
+            
+            ids = [[row[0] for row in results]]
+            documents = [[row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]) for row in results]]
+            # 🔧 FIXED: Parse JSON metadata properly
+            metadatas = [[self._json_to_metadata(row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]) for row in results]]
+           
+            return GetResult(
+                ids=ids,
+                documents=documents,
+                metadatas=metadatas
+            )
+
+        except Exception as e:
+            log.exception(f"Error during get: {e}")
+            return None
+
+    def delete(
+        self,
+        collection_name: str,
+        ids: Optional[List[str]] = None,
+        filter: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """
+        Delete items from the database.
+        
+        Deletes items from a collection based on IDs or metadata filters.
+        
+        Args:
+            collection_name (str): Name of the collection to delete from
+            ids (Optional[List[str]]): Specific item IDs to delete
+            filter (Optional[Dict[str, Any]]): Metadata filters for deletion
+            
+        Raises:
+            Exception: If deletion fails
+            
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> # Delete specific items by ID
+            >>> client.delete("my_collection", ids=["1", "3", "5"])
+            >>> # Or delete by metadata filter
+            >>> client.delete("my_collection", filter={"source": "deprecated_source"})
+        """
+        log.info(f"Deleting items from collection '{collection_name}'.")
+        
+        try:
+            query = "DELETE FROM document_chunk WHERE collection_name = :collection_name"
+            params = {'collection_name': collection_name}
+            
+            if ids:
+                # 🔧 FIXED: Use proper parameterized query to prevent SQL injection
+                placeholders = ','.join([f':id_{i}' for i in range(len(ids))])
+                query += f" AND id IN ({placeholders})"
+                for i, id_val in enumerate(ids):
+                    params[f'id_{i}'] = id_val
+            
+            if filter:
+                for i, (key, value) in enumerate(filter.items()):
+                    param_name = f"value_{i}"
+                    query += f" AND JSON_VALUE(vmetadata, '$.{key}' RETURNING VARCHAR2(4096)) = :{param_name}"
+                    params[param_name] = str(value)
+            
+            with self.get_connection() as connection:
+                with connection.cursor() as cursor:
+                    cursor.execute(query, params)
+                    deleted = cursor.rowcount
+                connection.commit()
+            
+            log.info(f"Deleted {deleted} items from collection '{collection_name}'.")
+            
+        except Exception as e:
+            log.exception(f"Error during delete: {e}")
+            raise
+
+    def reset(self) -> None:
+        """
+        Reset the database by deleting all items.
+        
+        Deletes all items from the document_chunk table.
+        
+        Raises:
+            Exception: If reset fails
+            
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> client.reset()  # Warning: Removes all data!
+        """
+        log.info("Resetting database - deleting all items.")
+        
+        try:
+            with self.get_connection() as connection:
+                with connection.cursor() as cursor:
+                    cursor.execute("DELETE FROM document_chunk")
+                    deleted = cursor.rowcount
+                connection.commit()
+            
+            log.info(f"Reset complete. Deleted {deleted} items from 'document_chunk' table.")
+
+        except Exception as e:
+            log.exception(f"Error during reset: {e}")
+            raise
+
+    def close(self) -> None:
+        """
+        Close the database connection pool.
+        
+        Properly closes the connection pool and releases all resources.
+        
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> # After finishing all operations
+            >>> client.close()
+        """
+        try:
+            if hasattr(self, 'pool') and self.pool:
+                self.pool.close()
+                log.info("Oracle Vector Search connection pool closed.")
+        except Exception as e:
+            log.exception(f"Error closing connection pool: {e}")
+
+    def has_collection(self, collection_name: str) -> bool:
+        """
+        Check if a collection exists.
+        
+        Args:
+            collection_name (str): Name of the collection to check
+            
+        Returns:
+            bool: True if the collection exists, False otherwise
+            
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> if client.has_collection("my_collection"):
+            ...     print("Collection exists!")
+            ... else:
+            ...     print("Collection does not exist.")
+        """
+        try:
+            with self.get_connection() as connection:
+                with connection.cursor() as cursor:
+                    cursor.execute("""
+                        SELECT COUNT(*)
+                        FROM document_chunk
+                        WHERE collection_name = :collection_name
+                        FETCH FIRST 1 ROWS ONLY
+                    """, {'collection_name': collection_name})
+                    
+                    count = cursor.fetchone()[0]
+            
+            return count > 0
+            
+        except Exception as e:
+            log.exception(f"Error checking collection existence: {e}")
+            return False
+
+    def delete_collection(self, collection_name: str) -> None:
+        """
+        Delete an entire collection.
+        
+        Removes all items belonging to the specified collection.
+        
+        Args:
+            collection_name (str): Name of the collection to delete
+            
+        Example:
+            >>> client = Oracle23aiClient()
+            >>> client.delete_collection("obsolete_collection")
+        """
+        log.info(f"Deleting collection '{collection_name}'.")
+        
+        try:
+            with self.get_connection() as connection:
+                with connection.cursor() as cursor:
+                    cursor.execute("""
+                        DELETE FROM document_chunk 
+                        WHERE collection_name = :collection_name
+                    """, {'collection_name': collection_name})
+                    
+                    deleted = cursor.rowcount
+                connection.commit()
+            
+            log.info(f"Collection '{collection_name}' deleted. Removed {deleted} items.")
+            
+        except Exception as e:
+            log.exception(f"Error deleting collection '{collection_name}': {e}")
+            raise

+ 4 - 0
backend/open_webui/retrieval/vector/factory.py

@@ -52,6 +52,10 @@ class Vector:
                 from open_webui.retrieval.vector.dbs.chroma import ChromaClient
 
                 return ChromaClient()
+            case VectorType.ORACLE23AI:
+                from open_webui.retrieval.vector.dbs.oracle23ai import Oracle23aiClient
+
+                return Oracle23aiClient()
             case _:
                 raise ValueError(f"Unsupported vector type: {vector_type}")
 

+ 1 - 0
backend/open_webui/retrieval/vector/type.py

@@ -9,4 +9,5 @@ class VectorType(StrEnum):
     ELASTICSEARCH = "elasticsearch"
     OPENSEARCH = "opensearch"
     PGVECTOR = "pgvector"
+    ORACLE23AI = "oracle23ai"
     S3VECTOR = "s3vector"

+ 1 - 0
backend/requirements.txt

@@ -58,6 +58,7 @@ opensearch-py==2.8.0
 playwright==1.49.1 # Caution: version must match docker-compose.playwright.yaml
 elasticsearch==9.0.1
 pinecone==6.0.2
+oracledb==3.2.0
 
 transformers
 sentence-transformers==4.1.0

+ 59 - 39
package-lock.json

@@ -1326,10 +1326,11 @@
 			}
 		},
 		"node_modules/@eslint/eslintrc/node_modules/brace-expansion": {
-			"version": "1.1.11",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-			"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+			"version": "1.1.12",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+			"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0",
 				"concat-map": "0.0.1"
@@ -1429,10 +1430,11 @@
 			}
 		},
 		"node_modules/@humanwhocodes/config-array/node_modules/brace-expansion": {
-			"version": "1.1.11",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-			"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+			"version": "1.1.12",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+			"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0",
 				"concat-map": "0.0.1"
@@ -2858,6 +2860,15 @@
 				"svelte": "^3.55.0 || ^4.0.0 || ^5.0.0"
 			}
 		},
+		"node_modules/@sveltejs/acorn-typescript": {
+			"version": "1.0.5",
+			"resolved": "https://registry.npmjs.org/@sveltejs/acorn-typescript/-/acorn-typescript-1.0.5.tgz",
+			"integrity": "sha512-IwQk4yfwLdibDlrXVE04jTZYlLnwsTT2PIOQQGNLWfjavGifnk1JD1LcZjZaBTRcxZu2FfPfNLOE04DSu9lqtQ==",
+			"license": "MIT",
+			"peerDependencies": {
+				"acorn": "^8.9.0"
+			}
+		},
 		"node_modules/@sveltejs/adapter-auto": {
 			"version": "3.2.2",
 			"resolved": "https://registry.npmjs.org/@sveltejs/adapter-auto/-/adapter-auto-3.2.2.tgz",
@@ -2895,16 +2906,17 @@
 			}
 		},
 		"node_modules/@sveltejs/kit": {
-			"version": "2.20.2",
-			"resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.20.2.tgz",
-			"integrity": "sha512-Dv8TOAZC9vyfcAB9TMsvUEJsRbklRTeNfcYBPaeH6KnABJ99i3CvCB2eNx8fiiliIqe+9GIchBg4RodRH5p1BQ==",
+			"version": "2.22.4",
+			"resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.22.4.tgz",
+			"integrity": "sha512-BXK9hTbP8AeQIfoz6+P3uoyVYStVHc5CIKqoTSF7hXm3Q5P9BwFMdEus4jsQuhaYmXGHzukcGlxe2QrsE8BJfQ==",
 			"license": "MIT",
 			"dependencies": {
+				"@sveltejs/acorn-typescript": "^1.0.5",
 				"@types/cookie": "^0.6.0",
+				"acorn": "^8.14.1",
 				"cookie": "^0.6.0",
 				"devalue": "^5.1.0",
 				"esm-env": "^1.2.2",
-				"import-meta-resolve": "^4.1.0",
 				"kleur": "^4.1.5",
 				"magic-string": "^0.30.5",
 				"mrmime": "^2.0.0",
@@ -2919,9 +2931,9 @@
 				"node": ">=18.13"
 			},
 			"peerDependencies": {
-				"@sveltejs/vite-plugin-svelte": "^3.0.0 || ^4.0.0-next.1 || ^5.0.0",
+				"@sveltejs/vite-plugin-svelte": "^3.0.0 || ^4.0.0-next.1 || ^5.0.0 || ^6.0.0-next.0",
 				"svelte": "^4.0.0 || ^5.0.0-next.0",
-				"vite": "^5.0.3 || ^6.0.0"
+				"vite": "^5.0.3 || ^6.0.0 || ^7.0.0-beta.0"
 			}
 		},
 		"node_modules/@sveltejs/svelte-virtual-list": {
@@ -4965,9 +4977,10 @@
 			"license": "ISC"
 		},
 		"node_modules/brace-expansion": {
-			"version": "2.0.1",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
-			"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
+			"version": "2.0.2",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
+			"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0"
 			}
@@ -7094,10 +7107,11 @@
 			}
 		},
 		"node_modules/eslint/node_modules/brace-expansion": {
-			"version": "1.1.11",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-			"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+			"version": "1.1.12",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+			"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0",
 				"concat-map": "0.0.1"
@@ -8210,6 +8224,7 @@
 			"version": "4.1.0",
 			"resolved": "https://registry.npmjs.org/import-meta-resolve/-/import-meta-resolve-4.1.0.tgz",
 			"integrity": "sha512-I6fiaX09Xivtk+THaMfAwnA3MVA5Big1WHF1Dfx9hFuvNIWpXnorlkzhcQf6ehrqQiiZECRt1poOAkPmer3ruw==",
+			"dev": true,
 			"funding": {
 				"type": "github",
 				"url": "https://github.com/sponsors/wooorm"
@@ -9389,10 +9404,11 @@
 			}
 		},
 		"node_modules/matcher-collection/node_modules/brace-expansion": {
-			"version": "1.1.11",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-			"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+			"version": "1.1.12",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+			"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0",
 				"concat-map": "0.0.1"
@@ -10848,10 +10864,11 @@
 			}
 		},
 		"node_modules/quick-temp/node_modules/brace-expansion": {
-			"version": "1.1.11",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-			"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+			"version": "1.1.12",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+			"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0",
 				"concat-map": "0.0.1"
@@ -11076,10 +11093,11 @@
 			}
 		},
 		"node_modules/rimraf/node_modules/brace-expansion": {
-			"version": "1.1.11",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-			"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+			"version": "1.1.12",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+			"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0",
 				"concat-map": "0.0.1"
@@ -11264,10 +11282,11 @@
 			}
 		},
 		"node_modules/sander/node_modules/brace-expansion": {
-			"version": "1.1.11",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-			"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+			"version": "1.1.12",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+			"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0",
 				"concat-map": "0.0.1"
@@ -12785,9 +12804,9 @@
 			}
 		},
 		"node_modules/undici": {
-			"version": "7.3.0",
-			"resolved": "https://registry.npmjs.org/undici/-/undici-7.3.0.tgz",
-			"integrity": "sha512-Qy96NND4Dou5jKoSJ2gm8ax8AJM/Ey9o9mz7KN1bb9GP+G0l20Zw8afxTnY2f4b7hmhn/z8aC2kfArVQlAhFBw==",
+			"version": "7.11.0",
+			"resolved": "https://registry.npmjs.org/undici/-/undici-7.11.0.tgz",
+			"integrity": "sha512-heTSIac3iLhsmZhUCjyS3JQEkZELateufzZuBaVM5RHXdSBMb1LPMQf5x+FH7qjsZYDP0ttAc3nnVpUB+wYbOg==",
 			"license": "MIT",
 			"engines": {
 				"node": ">=20.18.1"
@@ -12969,9 +12988,9 @@
 			}
 		},
 		"node_modules/vite": {
-			"version": "5.4.15",
-			"resolved": "https://registry.npmjs.org/vite/-/vite-5.4.15.tgz",
-			"integrity": "sha512-6ANcZRivqL/4WtwPGTKNaosuNJr5tWiftOC7liM7G9+rMb8+oeJeyzymDu4rTN93seySBmbjSfsS3Vzr19KNtA==",
+			"version": "5.4.19",
+			"resolved": "https://registry.npmjs.org/vite/-/vite-5.4.19.tgz",
+			"integrity": "sha512-qO3aKv3HoQC8QKiNSTuUM1l9o/XX3+c+VTgLHbJWHZGeTPVAg2XwazI9UWzoxjIJCGCV2zU60uqMzjeLZuULqA==",
 			"license": "MIT",
 			"dependencies": {
 				"esbuild": "^0.21.3",
@@ -13736,10 +13755,11 @@
 			}
 		},
 		"node_modules/walk-sync/node_modules/brace-expansion": {
-			"version": "1.1.11",
-			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-			"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
+			"version": "1.1.12",
+			"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+			"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 			"dev": true,
+			"license": "MIT",
 			"dependencies": {
 				"balanced-match": "^1.0.0",
 				"concat-map": "0.0.1"

+ 1 - 1
pyproject.toml

@@ -135,7 +135,7 @@ dependencies = [
     "gcp-storage-emulator>=2024.8.3",
 
     "moto[s3]>=5.0.26",
-
+    "oracledb>=3.2.0",
     "posthog==5.4.0",
 
 ]

+ 76 - 0
uv.lock

@@ -969,6 +969,20 @@ version = "0.8"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/7d/7d/60ee3f2b16d9bfdfa72e8599470a2c1a5b759cb113c6fe1006be28359327/docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5", size = 2814, upload-time = "2019-06-23T19:58:36.94Z" }
 
+[[package]]
+name = "duckduckgo-search"
+version = "8.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "lxml" },
+    { name = "primp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ad/c0/e18c2148d33a9d87f6a0cc00acba30b4e547be0f8cb85ccb313a6e8fbac7/duckduckgo_search-8.0.2.tar.gz", hash = "sha256:3109a99967b29cab8862823bbe320d140d5c792415de851b9d6288de2311b3ec", size = 21807, upload-time = "2025-05-15T08:43:25.311Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bf/6c/e36d22e76f4aa4e1ea7ea9b443bd49b5ffd2f13d430840f47e35284f797a/duckduckgo_search-8.0.2-py3-none-any.whl", hash = "sha256:b5ff8b6b8f169b8e1b15a788a5749aa900ebcefd6e1ab485787582f8d5b4f1ef", size = 18184, upload-time = "2025-05-15T08:43:23.713Z" },
+]
+
 [[package]]
 name = "durationpy"
 version = "0.9"
@@ -1044,6 +1058,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
 ]
 
+[[package]]
+name = "eval-type-backport"
+version = "0.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/30/ea/8b0ac4469d4c347c6a385ff09dc3c048c2d021696664e26c7ee6791631b5/eval_type_backport-0.2.2.tar.gz", hash = "sha256:f0576b4cf01ebb5bd358d02314d31846af5e07678387486e2c798af0e7d849c1", size = 9079, upload-time = "2024-12-21T20:09:46.005Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830, upload-time = "2024-12-21T20:09:44.175Z" },
+]
+
 [[package]]
 name = "events"
 version = "0.5"
@@ -1585,6 +1608,35 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/90/40/972271de05f9315c0d69f9f7ebbcadd83bc85322f538637d11bb8c67803d/grpcio_status-1.62.3-py3-none-any.whl", hash = "sha256:f9049b762ba8de6b1086789d8315846e094edac2c50beaf462338b301a8fd4b8", size = 14448, upload-time = "2024-08-06T00:30:15.702Z" },
 ]
 
+[[package]]
+name = "grpcio-tools"
+version = "1.62.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "grpcio" },
+    { name = "protobuf" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/54/fa/b69bd8040eafc09b88bb0ec0fea59e8aacd1a801e688af087cead213b0d0/grpcio-tools-1.62.3.tar.gz", hash = "sha256:7c7136015c3d62c3eef493efabaf9e3380e3e66d24ee8e94c01cb71377f57833", size = 4538520, upload-time = "2024-08-06T00:37:11.035Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/23/52/2dfe0a46b63f5ebcd976570aa5fc62f793d5a8b169e211c6a5aede72b7ae/grpcio_tools-1.62.3-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:703f46e0012af83a36082b5f30341113474ed0d91e36640da713355cd0ea5d23", size = 5147623, upload-time = "2024-08-06T00:30:54.894Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/2e/29fdc6c034e058482e054b4a3c2432f84ff2e2765c1342d4f0aa8a5c5b9a/grpcio_tools-1.62.3-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:7cc83023acd8bc72cf74c2edbe85b52098501d5b74d8377bfa06f3e929803492", size = 2719538, upload-time = "2024-08-06T00:30:57.928Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/60/abe5deba32d9ec2c76cdf1a2f34e404c50787074a2fee6169568986273f1/grpcio_tools-1.62.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ff7d58a45b75df67d25f8f144936a3e44aabd91afec833ee06826bd02b7fbe7", size = 3070964, upload-time = "2024-08-06T00:31:00.267Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/ad/e2b066684c75f8d9a48508cde080a3a36618064b9cadac16d019ca511444/grpcio_tools-1.62.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f2483ea232bd72d98a6dc6d7aefd97e5bc80b15cd909b9e356d6f3e326b6e43", size = 2805003, upload-time = "2024-08-06T00:31:02.565Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/3f/59bf7af786eae3f9d24ee05ce75318b87f541d0950190ecb5ffb776a1a58/grpcio_tools-1.62.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:962c84b4da0f3b14b3cdb10bc3837ebc5f136b67d919aea8d7bb3fd3df39528a", size = 3685154, upload-time = "2024-08-06T00:31:05.339Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/79/4dd62478b91e27084c67b35a2316ce8a967bd8b6cb8d6ed6c86c3a0df7cb/grpcio_tools-1.62.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8ad0473af5544f89fc5a1ece8676dd03bdf160fb3230f967e05d0f4bf89620e3", size = 3297942, upload-time = "2024-08-06T00:31:08.456Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/cb/86449ecc58bea056b52c0b891f26977afc8c4464d88c738f9648da941a75/grpcio_tools-1.62.3-cp311-cp311-win32.whl", hash = "sha256:db3bc9fa39afc5e4e2767da4459df82b095ef0cab2f257707be06c44a1c2c3e5", size = 910231, upload-time = "2024-08-06T00:31:11.464Z" },
+    { url = "https://files.pythonhosted.org/packages/45/a4/9736215e3945c30ab6843280b0c6e1bff502910156ea2414cd77fbf1738c/grpcio_tools-1.62.3-cp311-cp311-win_amd64.whl", hash = "sha256:e0898d412a434e768a0c7e365acabe13ff1558b767e400936e26b5b6ed1ee51f", size = 1052496, upload-time = "2024-08-06T00:31:13.665Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/a5/d6887eba415ce318ae5005e8dfac3fa74892400b54b6d37b79e8b4f14f5e/grpcio_tools-1.62.3-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:d102b9b21c4e1e40af9a2ab3c6d41afba6bd29c0aa50ca013bf85c99cdc44ac5", size = 5147690, upload-time = "2024-08-06T00:31:16.436Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/7c/3cde447a045e83ceb4b570af8afe67ffc86896a2fe7f59594dc8e5d0a645/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:0a52cc9444df978438b8d2332c0ca99000521895229934a59f94f37ed896b133", size = 2720538, upload-time = "2024-08-06T00:31:18.905Z" },
+    { url = "https://files.pythonhosted.org/packages/88/07/f83f2750d44ac4f06c07c37395b9c1383ef5c994745f73c6bfaf767f0944/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141d028bf5762d4a97f981c501da873589df3f7e02f4c1260e1921e565b376fa", size = 3071571, upload-time = "2024-08-06T00:31:21.684Z" },
+    { url = "https://files.pythonhosted.org/packages/37/74/40175897deb61e54aca716bc2e8919155b48f33aafec8043dda9592d8768/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47a5c093ab256dec5714a7a345f8cc89315cb57c298b276fa244f37a0ba507f0", size = 2806207, upload-time = "2024-08-06T00:31:24.208Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/ee/d8de915105a217cbcb9084d684abdc032030dcd887277f2ef167372287fe/grpcio_tools-1.62.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f6831fdec2b853c9daa3358535c55eed3694325889aa714070528cf8f92d7d6d", size = 3685815, upload-time = "2024-08-06T00:31:26.917Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/d9/4360a6c12be3d7521b0b8c39e5d3801d622fbb81cc2721dbd3eee31e28c8/grpcio_tools-1.62.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e02d7c1a02e3814c94ba0cfe43d93e872c758bd8fd5c2797f894d0c49b4a1dfc", size = 3298378, upload-time = "2024-08-06T00:31:30.401Z" },
+    { url = "https://files.pythonhosted.org/packages/29/3b/7cdf4a9e5a3e0a35a528b48b111355cd14da601413a4f887aa99b6da468f/grpcio_tools-1.62.3-cp312-cp312-win32.whl", hash = "sha256:b881fd9505a84457e9f7e99362eeedd86497b659030cf57c6f0070df6d9c2b9b", size = 910416, upload-time = "2024-08-06T00:31:33.118Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/66/dd3ec249e44c1cc15e902e783747819ed41ead1336fcba72bf841f72c6e9/grpcio_tools-1.62.3-cp312-cp312-win_amd64.whl", hash = "sha256:11c625eebefd1fd40a228fc8bae385e448c7e32a6ae134e43cf13bbc23f902b7", size = 1052856, upload-time = "2024-08-06T00:31:36.519Z" },
+]
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@@ -2665,6 +2717,7 @@ dependencies = [
     { name = "opencv-python-headless" },
     { name = "openpyxl" },
     { name = "opensearch-py" },
+    { name = "oracledb" },
     { name = "pandas" },
     { name = "passlib", extra = ["bcrypt"] },
     { name = "peewee" },
@@ -2744,6 +2797,7 @@ requires-dist = [
     { name = "ddgs", specifier = "==9.0.0" },
     { name = "docker", specifier = "~=7.1.0" },
     { name = "docx2txt", specifier = "==0.8" },
+    { name = "duckduckgo-search", specifier = "==8.0.2" },
     { name = "einops", specifier = "==0.8.1" },
     { name = "elasticsearch", specifier = "==9.0.1" },
     { name = "fake-useragent", specifier = "==2.1.0" },
@@ -2774,6 +2828,7 @@ requires-dist = [
     { name = "opencv-python-headless", specifier = "==4.11.0.86" },
     { name = "openpyxl", specifier = "==3.1.5" },
     { name = "opensearch-py", specifier = "==2.8.0" },
+    { name = "oracledb", specifier = ">=3.2.0" },
     { name = "pandas", specifier = "==2.2.3" },
     { name = "passlib", extras = ["bcrypt"], specifier = "==1.7.4" },
     { name = "peewee", specifier = "==3.18.1" },
@@ -3032,6 +3087,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/64/8a/9e1b54f50d1fddebbeac9a9b0632f8db6ece7add904fb593ee2e268ee4de/opentelemetry_util_http-0.50b0-py3-none-any.whl", hash = "sha256:21f8aedac861ffa3b850f8c0a6c373026189eb8630ac6e14a2bf8c55695cc090", size = 6942, upload-time = "2024-12-11T17:05:13.342Z" },
 ]
 
+[[package]]
+name = "oracledb"
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9d/2d/8336527f248dbeb7183b7b4bfafe3499119c467236f8916ce0ec3b8ef2b6/oracledb-3.2.0.tar.gz", hash = "sha256:9bf9f1c93e53142b33d1c5ebf5ababeebd2062a01d5ead68bbb640439ecf2223", size = 872574, upload-time = "2025-06-26T21:57:13.001Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4c/02/8279be85defe274a5dbc565c27f927d7d772117c070008853d75d4d780a8/oracledb-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:42e936190c5c76b115e2803e9c51a416a895c31ca551bc154b8c52121a84ee78", size = 3985060, upload-time = "2025-06-26T21:57:30.753Z" },
+    { url = "https://files.pythonhosted.org/packages/38/a2/ce33fca6a15b3357d00c92bffcfa614b2d9f008d8e997f57e0302c760fb6/oracledb-3.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a87a331df19a84cad3d50a99cd505f8d0ec500e5e22438a025bd66e037b20715", size = 2507408, upload-time = "2025-06-26T21:57:33.34Z" },
+    { url = "https://files.pythonhosted.org/packages/17/1b/dcaeb174ca777f28e05ac3d862cb468ce4b0a9c36900aa5f12ecbf29029f/oracledb-3.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b62f0c018e437b91254f019715a5fd5daafcf3182afee86ccd9d0f3dac75c0a7", size = 2687910, upload-time = "2025-06-26T21:57:36.173Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/f6/a575191c027b89486947597a7b6ae9c2abbc05eae2b79de037a6039ae455/oracledb-3.2.0-cp311-cp311-win32.whl", hash = "sha256:01477e65f129f8927ef64cd4b48d68646fc0115e1e95484dac5b82dd9da7b98b", size = 1548061, upload-time = "2025-06-26T21:57:38.273Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/ec/a9b5488858d3b45213ce03f442c5cdf5aef356269071b84caecadb326e29/oracledb-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:a1dc529f06a95ca3560d233609c266c17e828a08a70b8a434e2c3fe7000825eb", size = 1891446, upload-time = "2025-06-26T21:57:40.737Z" },
+    { url = "https://files.pythonhosted.org/packages/56/d1/04632c2fab7c9ab91c68630eb221e17019e74d5b023badc0c191e83119cf/oracledb-3.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1c2658983520b460776e74c75bb50e51a78e8ab743b64adc844a26a3a8a0bc7c", size = 3959613, upload-time = "2025-06-26T21:57:43.735Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/de/ebb82b6193583d0c7f13f908756d44bd2c03207b501457773b0793018231/oracledb-3.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c7a599ffe3238824951948992ab6b4532a0c1d4b33900d412f738a7da476d47", size = 2336800, upload-time = "2025-06-26T21:57:45.997Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/f5/f81f72fac3cfb52fc18965d4d07d76e26103c2cc60641a796b9618904b54/oracledb-3.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9388ad0e09dc4030cd62779acc0ae4e9dfd338da7d30c72768fb0589461485ff", size = 2509427, upload-time = "2025-06-26T21:57:48.686Z" },
+    { url = "https://files.pythonhosted.org/packages/02/ad/ded50edb5010b27173f203421f69f90e5f00acaf5686ccd4f7963e1c695b/oracledb-3.2.0-cp312-cp312-win32.whl", hash = "sha256:94ac95e52e6f4a9394408aba6cc5f90581219ecb872d450b2a80df9aa3cc4216", size = 1511775, upload-time = "2025-06-26T21:57:50.474Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/75/684d2e18d57d72abf85366e8e1f61aaa2e6b15c71bf95f5bb6e9d7e0c9a5/oracledb-3.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:a367604d091ee82c9e3f6d97e34b8ec919646209bda78337f6156b76252dd024", size = 1850296, upload-time = "2025-06-26T21:57:52.649Z" },
+]
+
 [[package]]
 name = "orjson"
 version = "3.10.14"