| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- import logging
- from typing import Optional
- from open_webui.retrieval.web.main import SearchResult, get_filtered_results
- from open_webui.env import SRC_LOG_LEVELS
- log = logging.getLogger(__name__)
- log.setLevel(SRC_LOG_LEVELS["RAG"])
- """
- Azure AI Search integration for Open WebUI.
- Documentation: https://learn.microsoft.com/en-us/python/api/overview/azure/search-documents-readme?view=azure-python
- Required package: azure-search-documents
- Install: pip install azure-search-documents
- """
- def search_azure(
- api_key: str,
- endpoint: str,
- index_name: str,
- query: str,
- count: int,
- filter_list: Optional[list[str]] = None,
- ) -> list[SearchResult]:
- """
- Search using Azure AI Search.
- Args:
- api_key: Azure Search API key (query key or admin key)
- endpoint: Azure Search service endpoint (e.g., https://myservice.search.windows.net)
- index_name: Name of the search index to query
- query: Search query string
- count: Number of results to return
- filter_list: Optional list of domains to filter results
- Returns:
- List of SearchResult objects with link, title, and snippet
- """
- try:
- from azure.core.credentials import AzureKeyCredential
- from azure.search.documents import SearchClient
- except ImportError:
- log.error(
- "azure-search-documents package is not installed. "
- "Install it with: pip install azure-search-documents"
- )
- raise ImportError(
- "azure-search-documents is required for Azure AI Search. "
- "Install it with: pip install azure-search-documents"
- )
- try:
- # Create search client with API key authentication
- credential = AzureKeyCredential(api_key)
- search_client = SearchClient(
- endpoint=endpoint, index_name=index_name, credential=credential
- )
- # Perform the search
- results = search_client.search(search_text=query, top=count)
- # Convert results to list and extract fields
- search_results = []
- for result in results:
- # Azure AI Search returns documents with custom schemas
- # We need to extract common fields that might represent URL, title, and content
- # Common field names to look for:
- result_dict = dict(result)
- # Try to find URL field (common names)
- link = (
- result_dict.get("url")
- or result_dict.get("link")
- or result_dict.get("uri")
- or result_dict.get("metadata_storage_path")
- or ""
- )
- # Try to find title field (common names)
- title = (
- result_dict.get("title")
- or result_dict.get("name")
- or result_dict.get("metadata_title")
- or result_dict.get("metadata_storage_name")
- or None
- )
- # Try to find content/snippet field (common names)
- snippet = (
- result_dict.get("content")
- or result_dict.get("snippet")
- or result_dict.get("description")
- or result_dict.get("summary")
- or result_dict.get("text")
- or None
- )
- # Truncate snippet if too long
- if snippet and len(snippet) > 500:
- snippet = snippet[:497] + "..."
- if link: # Only add if we found a valid link
- search_results.append(
- {
- "link": link,
- "title": title,
- "snippet": snippet,
- }
- )
- # Apply domain filtering if specified
- if filter_list:
- search_results = get_filtered_results(search_results, filter_list)
- # Convert to SearchResult objects
- return [
- SearchResult(
- link=result["link"],
- title=result.get("title"),
- snippet=result.get("snippet"),
- )
- for result in search_results
- ]
- except Exception as ex:
- log.error(f"Azure AI Search error: {ex}")
- raise ex
|