1
0

duckduckgo.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. import logging
  2. from typing import Optional
  3. from open_webui.retrieval.web.main import SearchResult, get_filtered_results
  4. from ddgs import DDGS
  5. from ddgs.exceptions import RatelimitException
  6. from open_webui.env import SRC_LOG_LEVELS
  7. log = logging.getLogger(__name__)
  8. log.setLevel(SRC_LOG_LEVELS["RAG"])
  9. def search_duckduckgo(
  10. query: str,
  11. count: int,
  12. filter_list: Optional[list[str]] = None,
  13. concurrent_requests: Optional[int] = None,
  14. ) -> list[SearchResult]:
  15. """
  16. Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects.
  17. Args:
  18. query (str): The query to search for
  19. count (int): The number of results to return
  20. Returns:
  21. list[SearchResult]: A list of search results
  22. """
  23. # Use the DDGS context manager to create a DDGS object
  24. search_results = []
  25. with DDGS() as ddgs:
  26. if concurrent_requests:
  27. ddgs.threads = concurrent_requests
  28. # Use the ddgs.text() method to perform the search
  29. try:
  30. search_results = ddgs.text(
  31. query, safesearch="moderate", max_results=count, backend="lite"
  32. )
  33. except RatelimitException as e:
  34. log.error(f"RatelimitException: {e}")
  35. if filter_list:
  36. search_results = get_filtered_results(search_results, filter_list)
  37. # Return the list of search results
  38. return [
  39. SearchResult(
  40. link=result["href"],
  41. title=result.get("title"),
  42. snippet=result.get("body"),
  43. )
  44. for result in search_results
  45. ]