oracle23ai.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. """
  2. # ORACLE23AI (Oracle23ai Vector Search) : env.examples
  3. VECTOR_DB = "oracle23ai"
  4. ## DBCS or oracle 23ai free
  5. ORACLE_DB_USE_WALLET = false
  6. ORACLE_DB_USER = "DEMOUSER"
  7. ORACLE_DB_PASSWORD = "Welcome123456"
  8. ORACLE_DB_DSN = "localhost:1521/FREEPDB1"
  9. ## ADW or ATP
  10. # ORACLE_DB_USE_WALLET = true
  11. # ORACLE_DB_USER = "DEMOUSER"
  12. # ORACLE_DB_PASSWORD = "Welcome123456"
  13. # ORACLE_DB_DSN = "medium"
  14. # ORACLE_DB_DSN = "(description= (retry_count=3)(retry_delay=3)(address=(protocol=tcps)(port=1522)(host=xx.oraclecloud.com))(connect_data=(service_name=yy.adb.oraclecloud.com))(security=(ssl_server_dn_match=no)))"
  15. # ORACLE_WALLET_DIR = "/home/opc/adb_wallet"
  16. # ORACLE_WALLET_PASSWORD = "Welcome1"
  17. ORACLE_VECTOR_LENGTH = 768
  18. ORACLE_DB_POOL_MIN = 2
  19. ORACLE_DB_POOL_MAX = 10
  20. ORACLE_DB_POOL_INCREMENT = 1
  21. """
  22. from typing import Optional, List, Dict, Any
  23. from decimal import Decimal
  24. import logging
  25. import os
  26. import oracledb
  27. from open_webui.retrieval.vector.main import (
  28. VectorDBBase,
  29. VectorItem,
  30. SearchResult,
  31. GetResult,
  32. )
  33. from open_webui.config import (
  34. ORACLE_DB_USE_WALLET,
  35. ORACLE_DB_USER,
  36. ORACLE_DB_PASSWORD,
  37. ORACLE_DB_DSN,
  38. ORACLE_WALLET_DIR,
  39. ORACLE_WALLET_PASSWORD,
  40. ORACLE_VECTOR_LENGTH,
  41. ORACLE_DB_POOL_MIN,
  42. ORACLE_DB_POOL_MAX,
  43. ORACLE_DB_POOL_INCREMENT,
  44. )
  45. from open_webui.env import SRC_LOG_LEVELS
  46. log = logging.getLogger(__name__)
  47. log.setLevel(SRC_LOG_LEVELS["RAG"])
  48. # ORACLE_DB_USE_WALLET = os.environ.get("ORACLE_DB_USE_WALLET", "DBCS")
  49. # ORACLE_DB_USER = os.environ.get("ORACLE_DB_USER", "DEMOUSER")
  50. # ORACLE_DB_PASSWORD = os.environ.get("ORACLE_DB_PASSWORD", "Welcome123456")
  51. # ORACLE_DB_DSN = os.environ.get("ORACLE_DB_DSN", "medium")
  52. # ORACLE_DB_DSN = os.environ.get("ORACLE_DB_DSN", "(description= (retry_count=3)(retry_delay=3)(address=(protocol=tcps)(port=1522)(host=d6aqmjs6.adb.us-chicago-1.oraclecloud.com))(connect_data=(service_name=g13fc7c96b5ee55_agentvs_medium.adb.oraclecloud.com))(security=(ssl_server_dn_match=no)))")
  53. class Oracle23aiClient(VectorDBBase):
  54. """
  55. Oracle Vector Database Client for vector similarity search using Oracle Database 23ai.
  56. This client provides an interface to store, retrieve, and search vector embeddings
  57. in an Oracle database. It uses connection pooling for efficient database access
  58. and supports vector similarity search operations.
  59. Attributes:
  60. pool: Connection pool for Oracle database connections
  61. """
  62. def __init__(self) -> None:
  63. """
  64. Initialize the Oracle23aiClient with a connection pool.
  65. Creates a connection pool with min=2 and max=10 connections, initializes
  66. the database schema if needed, and sets up necessary tables and indexes.
  67. Args:
  68. db_type (str): Database type - "ADB" for Autonomous Database or "DBCS" for Database Cloud Service
  69. Raises:
  70. ValueError: If required configuration parameters are missing
  71. Exception: If database initialization fails
  72. """
  73. try:
  74. # Create the appropriate connection pool based on DB type
  75. if ORACLE_DB_USE_WALLET:
  76. self._create_adb_pool()
  77. else: # DBCS
  78. self._create_dbcs_pool()
  79. dsn = ORACLE_DB_DSN
  80. log.info(f" >>> Creating Connection Pool [{ORACLE_DB_USER}:**@{dsn}]")
  81. with self.get_connection() as connection:
  82. log.info("Connection version:", connection.version)
  83. self._initialize_database(connection)
  84. log.info("Oracle Vector Search initialization complete.")
  85. except Exception as e:
  86. log.exception(f"Error during Oracle Vector Search initialization: {e}")
  87. raise
  88. def _create_adb_pool(self) -> None:
  89. """
  90. Create connection pool for Oracle Autonomous Database.
  91. Uses wallet-based authentication.
  92. """
  93. self.pool = oracledb.create_pool(
  94. user=ORACLE_DB_USER,
  95. password=ORACLE_DB_PASSWORD,
  96. dsn=ORACLE_DB_DSN,
  97. min=ORACLE_DB_POOL_MIN,
  98. max=ORACLE_DB_POOL_MAX,
  99. increment=ORACLE_DB_POOL_INCREMENT,
  100. config_dir=ORACLE_WALLET_DIR,
  101. wallet_location=ORACLE_WALLET_DIR,
  102. wallet_password=ORACLE_WALLET_PASSWORD
  103. )
  104. log.info(f"Created ADB connection pool with wallet authentication.")
  105. def _create_dbcs_pool(self) -> None:
  106. """
  107. Create connection pool for Oracle Database Cloud Service.
  108. Uses basic authentication without wallet.
  109. """
  110. self.pool = oracledb.create_pool(
  111. user=ORACLE_DB_USER,
  112. password=ORACLE_DB_PASSWORD,
  113. dsn=ORACLE_DB_DSN,
  114. min=ORACLE_DB_POOL_MIN,
  115. max=ORACLE_DB_POOL_MAX,
  116. increment=ORACLE_DB_POOL_INCREMENT
  117. )
  118. log.info("Created DB connection pool with basic authentication.")
  119. def get_connection(self):
  120. """
  121. Acquire a connection from the connection pool with retry logic.
  122. Returns:
  123. connection: A database connection with output type handler configured
  124. """
  125. max_retries = 3
  126. for attempt in range(max_retries):
  127. try:
  128. connection = self.pool.acquire()
  129. connection.outputtypehandler = self._output_type_handler
  130. return connection
  131. except oracledb.DatabaseError as e:
  132. error_obj, = e.args
  133. log.exception(f"Connection attempt {attempt + 1} failed: {error_obj.message}")
  134. if attempt < max_retries - 1:
  135. import time
  136. wait_time = 2 ** attempt
  137. log.info(f"Retrying in {wait_time} seconds...")
  138. time.sleep(wait_time)
  139. else:
  140. raise
  141. def start_health_monitor(self, interval_seconds: int = 60):
  142. """
  143. Start a background thread to periodically check the health of the connection pool.
  144. Args:
  145. interval_seconds (int): Number of seconds between health checks
  146. """
  147. def _monitor():
  148. while True:
  149. try:
  150. log.info("[HealthCheck] Running periodic DB health check...")
  151. self.ensure_connection()
  152. log.info("[HealthCheck] Connection is healthy.")
  153. except Exception as e:
  154. log.exception(f"[HealthCheck] Connection health check failed: {e}")
  155. time.sleep(interval_seconds)
  156. thread = threading.Thread(target=_monitor, daemon=True)
  157. thread.start()
  158. log.info(f"Started DB health monitor every {interval_seconds} seconds.")
  159. def _reconnect_pool(self):
  160. """
  161. Attempt to reinitialize the connection pool if it's been closed or broken.
  162. Args:
  163. db_type (str): Database type - "ADB" for Autonomous Database or "DBCS" for Database Cloud Service
  164. """
  165. try:
  166. log.info("Attempting to reinitialize the Oracle connection pool...")
  167. # Re-create the appropriate connection pool based on DB type
  168. if ORACLE_DB_USE_WALLET:
  169. self._create_adb_pool()
  170. else: # DBCS
  171. self._create_dbcs_pool()
  172. log.info("Connection pool reinitialized.")
  173. except Exception as e:
  174. log.exception(f"Failed to reinitialize the connection pool: {e}")
  175. raise
  176. def ensure_connection(self):
  177. """
  178. Ensure the database connection is alive, reconnecting pool if needed.
  179. """
  180. try:
  181. with self.get_connection() as connection:
  182. with connection.cursor() as cursor:
  183. cursor.execute("SELECT 1 FROM dual")
  184. except Exception as e:
  185. log.exception(f"Connection check failed: {e}, attempting to reconnect pool...")
  186. self._reconnect_pool()
  187. def _output_type_handler(self, cursor, metadata):
  188. """
  189. Handle Oracle vector type conversion.
  190. Args:
  191. cursor: Oracle database cursor
  192. metadata: Metadata for the column
  193. Returns:
  194. A variable with appropriate conversion for vector types
  195. """
  196. if metadata.type_code is oracledb.DB_TYPE_VECTOR:
  197. return cursor.var(metadata.type_code, arraysize=cursor.arraysize,
  198. outconverter=list)
  199. # Rest of the Oracle23aiClient class remains unchanged...
  200. def _initialize_database(self, connection) -> None:
  201. """
  202. Initialize database schema, tables and indexes.
  203. Creates the document_chunk table and necessary indexes if they don't exist.
  204. Args:
  205. connection: Oracle database connection
  206. Raises:
  207. Exception: If schema initialization fails
  208. """
  209. with connection.cursor() as cursor:
  210. log.info(f" >>> Creating Table document_chunk")
  211. cursor.execute(f"""
  212. BEGIN
  213. EXECUTE IMMEDIATE '
  214. CREATE TABLE IF NOT EXISTS document_chunk (
  215. id VARCHAR2(255) PRIMARY KEY,
  216. collection_name VARCHAR2(255) NOT NULL,
  217. text CLOB,
  218. vmetadata JSON,
  219. vector vector(*, float32)
  220. )
  221. ';
  222. EXCEPTION
  223. WHEN OTHERS THEN
  224. IF SQLCODE != -955 THEN
  225. RAISE;
  226. END IF;
  227. END;
  228. """)
  229. log.info(f" >>> Creating Table document_chunk_collection_name_idx")
  230. cursor.execute("""
  231. BEGIN
  232. EXECUTE IMMEDIATE '
  233. CREATE INDEX IF NOT exists document_chunk_collection_name_idx
  234. ON document_chunk (collection_name)
  235. ';
  236. EXCEPTION
  237. WHEN OTHERS THEN
  238. IF SQLCODE != -955 THEN
  239. RAISE;
  240. END IF;
  241. END;
  242. """)
  243. log.info(f" >>> Creating VECTOR INDEX document_chunk_vector_ivf_idx")
  244. cursor.execute("""
  245. BEGIN
  246. EXECUTE IMMEDIATE '
  247. create vector index IF NOT EXISTS document_chunk_vector_ivf_idx on document_chunk(vector)
  248. organization neighbor partitions
  249. distance cosine
  250. with target accuracy 95
  251. PARAMETERS (type IVF, NEIGHBOR PARTITIONS 100)
  252. ';
  253. EXCEPTION
  254. WHEN OTHERS THEN
  255. IF SQLCODE != -955 THEN
  256. RAISE;
  257. END IF;
  258. END;
  259. """)
  260. connection.commit()
  261. def check_vector_length(self) -> None:
  262. """
  263. Check vector length compatibility (placeholder).
  264. This method would check if the configured vector length matches the database schema.
  265. Currently implemented as a placeholder.
  266. """
  267. pass
  268. def _vector_to_blob(self, vector: List[float]) -> bytes:
  269. """
  270. Convert a vector to Oracle BLOB format.
  271. Args:
  272. vector (List[float]): The vector to convert
  273. Returns:
  274. bytes: The vector in Oracle BLOB format
  275. """
  276. import array
  277. return array.array("f", vector)
  278. def adjust_vector_length(self, vector: List[float]) -> List[float]:
  279. """
  280. Adjust vector to the expected length if needed.
  281. Args:
  282. vector (List[float]): The vector to adjust
  283. Returns:
  284. List[float]: The adjusted vector
  285. """
  286. return vector
  287. def _decimal_handler(self, obj):
  288. """
  289. Handle Decimal objects for JSON serialization.
  290. Args:
  291. obj: Object to serialize
  292. Returns:
  293. float: Converted decimal value
  294. Raises:
  295. TypeError: If object is not JSON serializable
  296. """
  297. if isinstance(obj, Decimal):
  298. return float(obj)
  299. raise TypeError(f"{obj} is not JSON serializable")
  300. def _metadata_to_json(self, metadata: Dict) -> str:
  301. """
  302. Convert metadata dictionary to JSON string.
  303. Args:
  304. metadata (Dict): Metadata dictionary
  305. Returns:
  306. str: JSON representation of metadata
  307. """
  308. import json
  309. return json.dumps(metadata, default=self._decimal_handler) if metadata else "{}"
  310. def _json_to_metadata(self, json_str: str) -> Dict:
  311. """
  312. Convert JSON string to metadata dictionary.
  313. Args:
  314. json_str (str): JSON string
  315. Returns:
  316. Dict: Metadata dictionary
  317. """
  318. import json
  319. return json.loads(json_str) if json_str else {}
  320. def insert(self, collection_name: str, items: List[VectorItem]) -> None:
  321. """
  322. Insert vector items into the database.
  323. Args:
  324. collection_name (str): Name of the collection
  325. items (List[VectorItem]): List of vector items to insert
  326. Raises:
  327. Exception: If insertion fails
  328. Example:
  329. >>> client = Oracle23aiClient()
  330. >>> items = [
  331. ... {"id": "1", "text": "Sample text", "vector": [0.1, 0.2, ...], "metadata": {"source": "doc1"}},
  332. ... {"id": "2", "text": "Another text", "vector": [0.3, 0.4, ...], "metadata": {"source": "doc2"}}
  333. ... ]
  334. >>> client.insert("my_collection", items)
  335. """
  336. log.info(f"Oracle23aiClient:Inserting {len(items)} items into collection '{collection_name}'.")
  337. with self.get_connection() as connection:
  338. try:
  339. with connection.cursor() as cursor:
  340. for item in items:
  341. vector_blob = self._vector_to_blob(item["vector"])
  342. metadata_json = self._metadata_to_json(item["metadata"])
  343. cursor.execute("""
  344. INSERT INTO document_chunk
  345. (id, collection_name, text, vmetadata, vector)
  346. VALUES (:id, :collection_name, :text, :metadata, :vector)
  347. """, {
  348. 'id': item["id"],
  349. 'collection_name': collection_name,
  350. 'text': item["text"],
  351. 'metadata': metadata_json,
  352. 'vector': vector_blob
  353. })
  354. connection.commit()
  355. log.info(f"Oracle23aiClient:Inserted {len(items)} items into collection '{collection_name}'.")
  356. except Exception as e:
  357. connection.rollback()
  358. log.exception(f"Error during insert: {e}")
  359. raise
  360. def upsert(self, collection_name: str, items: List[VectorItem]) -> None:
  361. """
  362. Update or insert vector items into the database.
  363. If an item with the same ID exists, it will be updated;
  364. otherwise, it will be inserted.
  365. Args:
  366. collection_name (str): Name of the collection
  367. items (List[VectorItem]): List of vector items to upsert
  368. Raises:
  369. Exception: If upsert operation fails
  370. Example:
  371. >>> client = Oracle23aiClient()
  372. >>> items = [
  373. ... {"id": "1", "text": "Updated text", "vector": [0.1, 0.2, ...], "metadata": {"source": "doc1"}},
  374. ... {"id": "3", "text": "New item", "vector": [0.5, 0.6, ...], "metadata": {"source": "doc3"}}
  375. ... ]
  376. >>> client.upsert("my_collection", items)
  377. """
  378. with self.get_connection() as connection:
  379. try:
  380. with connection.cursor() as cursor:
  381. for item in items:
  382. vector_blob = self._vector_to_blob(item["vector"])
  383. metadata_json = self._metadata_to_json(item["metadata"])
  384. cursor.execute("""
  385. MERGE INTO document_chunk d
  386. USING (SELECT :id as id FROM dual) s
  387. ON (d.id = s.id)
  388. WHEN MATCHED THEN
  389. UPDATE SET
  390. collection_name = :collection_name,
  391. text = :text,
  392. vmetadata = :metadata,
  393. vector = :vector
  394. WHEN NOT MATCHED THEN
  395. INSERT (id, collection_name, text, vmetadata, vector)
  396. VALUES (:id, :collection_name, :text, :metadata, :vector)
  397. """, {
  398. 'id': item["id"],
  399. 'collection_name': collection_name,
  400. 'text': item["text"],
  401. 'metadata': metadata_json,
  402. 'vector': vector_blob,
  403. 'id': item["id"],
  404. 'collection_name': collection_name,
  405. 'text': item["text"],
  406. 'metadata': metadata_json,
  407. 'vector': vector_blob
  408. })
  409. connection.commit()
  410. log.info(f"Upserted {len(items)} items into collection '{collection_name}'.")
  411. except Exception as e:
  412. connection.rollback()
  413. log.exception(f"Error during upsert: {e}")
  414. raise
  415. def search(
  416. self,
  417. collection_name: str,
  418. vectors: List[List[float]],
  419. limit: Optional[int] = None
  420. ) -> Optional[SearchResult]:
  421. """
  422. Search for similar vectors in the database.
  423. Performs vector similarity search using cosine distance.
  424. Args:
  425. collection_name (str): Name of the collection to search
  426. vectors (List[List[float]]): Query vectors to find similar items for
  427. limit (Optional[int]): Maximum number of results to return per query
  428. Returns:
  429. Optional[SearchResult]: Search results containing ids, distances, documents, and metadata
  430. Example:
  431. >>> client = Oracle23aiClient()
  432. >>> query_vector = [0.1, 0.2, 0.3, ...] # Must match VECTOR_LENGTH
  433. >>> results = client.search("my_collection", [query_vector], limit=5)
  434. >>> if results:
  435. ... print(f"Found {len(results.ids[0])} matches")
  436. ... for i, (id, dist) in enumerate(zip(results.ids[0], results.distances[0])):
  437. ... print(f"Match {i+1}: id={id}, distance={dist}")
  438. """
  439. print(f"Oracle23aiClient:Searching items from collection '{collection_name}'.")
  440. try:
  441. if not vectors:
  442. return None
  443. limit = limit or 10
  444. num_queries = len(vectors)
  445. ids = [[] for _ in range(num_queries)]
  446. distances = [[] for _ in range(num_queries)]
  447. documents = [[] for _ in range(num_queries)]
  448. metadatas = [[] for _ in range(num_queries)]
  449. with self.get_connection() as connection:
  450. with connection.cursor() as cursor:
  451. for qid, vector in enumerate(vectors):
  452. vector_blob = self._vector_to_blob(vector)
  453. cursor.execute("""
  454. SELECT dc.id, dc.text,
  455. JSON_SERIALIZE(dc.vmetadata) as vmetadata,
  456. VECTOR_DISTANCE(dc.vector, :query_vector, COSINE) as distance
  457. FROM document_chunk dc
  458. WHERE dc.collection_name = :collection_name
  459. ORDER BY VECTOR_DISTANCE(dc.vector, :query_vector, COSINE)
  460. FETCH APPROX FIRST :limit ROWS ONLY
  461. """, {
  462. 'query_vector': vector_blob,
  463. 'collection_name': collection_name,
  464. 'limit': limit
  465. })
  466. results = cursor.fetchall()
  467. for row in results:
  468. ids[qid].append(row[0])
  469. documents[qid].append(row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]))
  470. metadatas[qid].append(row[2].read() if isinstance(row[2], oracledb.LOB) else row[2])
  471. distances[qid].append(float(row[3]))
  472. return SearchResult(
  473. ids=ids,
  474. distances=distances,
  475. documents=documents,
  476. metadatas=metadatas
  477. )
  478. except Exception as e:
  479. log.exception(f"Error during search: {e}")
  480. import traceback
  481. log.exception(traceback.format_exc())
  482. return None
  483. def query(
  484. self,
  485. collection_name: str,
  486. filter: Dict[str, Any],
  487. limit: Optional[int] = None
  488. ) -> Optional[GetResult]:
  489. """
  490. Query items based on metadata filters.
  491. Retrieves items that match specified metadata criteria.
  492. Args:
  493. collection_name (str): Name of the collection to query
  494. filter (Dict[str, Any]): Metadata filters to apply
  495. limit (Optional[int]): Maximum number of results to return
  496. Returns:
  497. Optional[GetResult]: Query results containing ids, documents, and metadata
  498. Example:
  499. >>> client = Oracle23aiClient()
  500. >>> filter = {"source": "doc1", "category": "finance"}
  501. >>> results = client.query("my_collection", filter, limit=20)
  502. >>> if results:
  503. ... print(f"Found {len(results.ids[0])} matching documents")
  504. """
  505. print(f"Oracle23aiClient:Querying items from collection '{collection_name}'.")
  506. try:
  507. limit = limit or 100
  508. query = """
  509. SELECT id, text, vmetadata
  510. FROM document_chunk
  511. WHERE collection_name = :collection_name
  512. """
  513. params = {'collection_name': collection_name}
  514. for i, (key, value) in enumerate(filter.items()):
  515. param_name = f"value_{i}"
  516. query += f" AND JSON_VALUE(vmetadata, '$.{key}' RETURNING VARCHAR2(4096)) = :{param_name}"
  517. params[param_name] = str(value)
  518. query += " FETCH FIRST :limit ROWS ONLY"
  519. params['limit'] = limit
  520. with self.get_connection() as connection:
  521. with connection.cursor() as cursor:
  522. cursor.execute(query, params)
  523. results = cursor.fetchall()
  524. if not results:
  525. return None
  526. ids = [[row[0] for row in results]]
  527. documents = [[row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]) for row in results]]
  528. metadatas = [[row[2].read() if isinstance(row[2], oracledb.LOB) else row[2] for row in results]]
  529. return GetResult(
  530. ids=ids,
  531. documents=documents,
  532. metadatas=metadatas
  533. )
  534. except Exception as e:
  535. log.exception(f"Error during query: {e}")
  536. import traceback
  537. log.exception(traceback.format_exc())
  538. return None
  539. def get(
  540. self,
  541. collection_name: str,
  542. limit: Optional[int] = None
  543. ) -> Optional[GetResult]:
  544. """
  545. Get all items in a collection.
  546. Retrieves items from a specified collection up to the limit.
  547. Args:
  548. collection_name (str): Name of the collection to retrieve
  549. limit (Optional[int]): Maximum number of items to retrieve
  550. Returns:
  551. Optional[GetResult]: Result containing ids, documents, and metadata
  552. Example:
  553. >>> client = Oracle23aiClient()
  554. >>> results = client.get("my_collection", limit=50)
  555. >>> if results:
  556. ... print(f"Retrieved {len(results.ids[0])} documents from collection")
  557. """
  558. try:
  559. limit = limit or 100
  560. with self.get_connection() as connection:
  561. with connection.cursor() as cursor:
  562. cursor.execute("""
  563. SELECT /*+ MONITOR */ id, text, vmetadata
  564. FROM document_chunk
  565. WHERE collection_name = :collection_name
  566. FETCH FIRST :limit ROWS ONLY
  567. """, {
  568. 'collection_name': collection_name,
  569. 'limit': limit
  570. })
  571. results = cursor.fetchall()
  572. if not results:
  573. return None
  574. ids = [[row[0] for row in results]]
  575. documents = [[row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]) for row in results]]
  576. metadatas = [[row[2].read() if isinstance(row[2], oracledb.LOB) else row[2] for row in results]]
  577. return GetResult(
  578. ids=ids,
  579. documents=documents,
  580. metadatas=metadatas
  581. )
  582. except Exception as e:
  583. log.exception(f"Error during get: {e}")
  584. import traceback
  585. log.exception(traceback.format_exc())
  586. return None
  587. def delete(
  588. self,
  589. collection_name: str,
  590. ids: Optional[List[str]] = None,
  591. filter: Optional[Dict[str, Any]] = None,
  592. ) -> None:
  593. """
  594. Delete items from the database.
  595. Deletes items from a collection based on IDs or metadata filters.
  596. Args:
  597. collection_name (str): Name of the collection to delete from
  598. ids (Optional[List[str]]): Specific item IDs to delete
  599. filter (Optional[Dict[str, Any]]): Metadata filters for deletion
  600. Raises:
  601. Exception: If deletion fails
  602. Example:
  603. >>> client = Oracle23aiClient()
  604. >>> # Delete specific items by ID
  605. >>> client.delete("my_collection", ids=["1", "3", "5"])
  606. >>> # Or delete by metadata filter
  607. >>> client.delete("my_collection", filter={"source": "deprecated_source"})
  608. """
  609. try:
  610. query = "DELETE FROM document_chunk WHERE collection_name = :collection_name"
  611. params = {'collection_name': collection_name}
  612. if ids:
  613. id_list = ",".join([f"'{id}'" for id in ids])
  614. query += f" AND id IN ({id_list})"
  615. if filter:
  616. for i, (key, value) in enumerate(filter.items()):
  617. param_name = f"value_{i}"
  618. query += f" AND JSON_VALUE(vmetadata, '$.{key}' RETURNING VARCHAR2(4096)) = :{param_name}"
  619. params[param_name] = str(value)
  620. with self.get_connection() as connection:
  621. with connection.cursor() as cursor:
  622. cursor.execute(query, params)
  623. deleted = cursor.rowcount
  624. connection.commit()
  625. log.info(f"Deleted {deleted} items from collection '{collection_name}'.")
  626. except Exception as e:
  627. log.exception(f"Error during delete: {e}")
  628. raise
  629. def reset(self) -> None:
  630. """
  631. Reset the database by deleting all items.
  632. Deletes all items from the document_chunk table.
  633. Raises:
  634. Exception: If reset fails
  635. Example:
  636. >>> client = Oracle23aiClient()
  637. >>> client.reset() # Warning: Removes all data!
  638. """
  639. try:
  640. with self.get_connection() as connection:
  641. with connection.cursor() as cursor:
  642. cursor.execute("DELETE FROM document_chunk")
  643. deleted = cursor.rowcount
  644. connection.commit()
  645. log.info(f"Reset complete. Deleted {deleted} items from 'document_chunk' table.")
  646. except Exception as e:
  647. log.exception(f"Error during reset: {e}")
  648. raise
  649. def close(self) -> None:
  650. """
  651. Close the database connection pool.
  652. Properly closes the connection pool and releases all resources.
  653. Example:
  654. >>> client = Oracle23aiClient()
  655. >>> # After finishing all operations
  656. >>> client.close()
  657. """
  658. try:
  659. if hasattr(self, 'pool') and self.pool:
  660. self.pool.close()
  661. print("Oracle Vector Search connection pool closed.")
  662. except Exception as e:
  663. print(f"Error closing connection pool: {e}")
  664. def has_collection(self, collection_name: str) -> bool:
  665. """
  666. Check if a collection exists.
  667. Args:
  668. collection_name (str): Name of the collection to check
  669. Returns:
  670. bool: True if the collection exists, False otherwise
  671. Example:
  672. >>> client = Oracle23aiClient()
  673. >>> if client.has_collection("my_collection"):
  674. ... print("Collection exists!")
  675. ... else:
  676. ... print("Collection does not exist.")
  677. """
  678. try:
  679. with self.get_connection() as connection:
  680. with connection.cursor() as cursor:
  681. cursor.execute("""
  682. SELECT COUNT(*)
  683. FROM document_chunk
  684. WHERE collection_name = :collection_name
  685. FETCH FIRST 1 ROWS ONLY
  686. """, {'collection_name': collection_name})
  687. count = cursor.fetchone()[0]
  688. return count > 0
  689. except Exception as e:
  690. log.exception(f"Error checking collection existence: {e}")
  691. return False
  692. def delete_collection(self, collection_name: str) -> None:
  693. """
  694. Delete an entire collection.
  695. Removes all items belonging to the specified collection.
  696. Args:
  697. collection_name (str): Name of the collection to delete
  698. Example:
  699. >>> client = Oracle23aiClient()
  700. >>> client.delete_collection("obsolete_collection")
  701. """
  702. self.delete(collection_name)
  703. log.info(f"Collection '{collection_name}' deleted.")