Browse Source

Merge pull request #11728 from open-webui/dev

0.6
Timothy Jaeryang Baek 4 months ago
parent
commit
04799f1f95
100 changed files with 4309 additions and 1171 deletions
  1. 3 3
      .github/pull_request_template.md
  2. 49 0
      CHANGELOG.md
  3. 1 1
      Dockerfile
  4. 121 4
      backend/open_webui/config.py
  5. 33 15
      backend/open_webui/env.py
  6. 3 0
      backend/open_webui/functions.py
  7. 62 9
      backend/open_webui/main.py
  8. 13 6
      backend/open_webui/models/folders.py
  9. 54 2
      backend/open_webui/retrieval/loaders/main.py
  10. 93 0
      backend/open_webui/retrieval/loaders/tavily.py
  11. 155 65
      backend/open_webui/retrieval/utils.py
  12. 20 7
      backend/open_webui/retrieval/vector/dbs/chroma.py
  13. 4 1
      backend/open_webui/retrieval/vector/dbs/milvus.py
  14. 110 65
      backend/open_webui/retrieval/vector/dbs/opensearch.py
  15. 3 1
      backend/open_webui/retrieval/vector/dbs/pgvector.py
  16. 2 1
      backend/open_webui/retrieval/vector/dbs/qdrant.py
  17. 172 76
      backend/open_webui/retrieval/web/utils.py
  18. 3 1
      backend/open_webui/routers/audio.py
  19. 8 3
      backend/open_webui/routers/auths.py
  20. 103 0
      backend/open_webui/routers/chats.py
  21. 16 2
      backend/open_webui/routers/evaluations.py
  22. 169 44
      backend/open_webui/routers/files.py
  23. 15 2
      backend/open_webui/routers/folders.py
  24. 2 4
      backend/open_webui/routers/images.py
  25. 17 7
      backend/open_webui/routers/knowledge.py
  26. 8 4
      backend/open_webui/routers/memories.py
  27. 26 12
      backend/open_webui/routers/ollama.py
  28. 32 13
      backend/open_webui/routers/openai.py
  29. 2 2
      backend/open_webui/routers/pipelines.py
  30. 34 10
      backend/open_webui/routers/retrieval.py
  31. 25 5
      backend/open_webui/routers/users.py
  32. 85 40
      backend/open_webui/socket/main.py
  33. 9 5
      backend/open_webui/socket/utils.py
  34. 2 1
      backend/open_webui/utils/filter.py
  35. 182 64
      backend/open_webui/utils/middleware.py
  36. 1 0
      backend/open_webui/utils/models.py
  37. 8 5
      backend/open_webui/utils/oauth.py
  38. 24 0
      backend/open_webui/utils/payload.py
  39. 10 6
      backend/open_webui/utils/plugin.py
  40. 109 0
      backend/open_webui/utils/redis.py
  41. 0 0
      backend/open_webui/utils/telemetry/__init__.py
  42. 26 0
      backend/open_webui/utils/telemetry/constants.py
  43. 31 0
      backend/open_webui/utils/telemetry/exporters.py
  44. 202 0
      backend/open_webui/utils/telemetry/instrumentors.py
  45. 23 0
      backend/open_webui/utils/telemetry/setup.py
  46. 6 2
      backend/open_webui/utils/tools.py
  47. 16 2
      backend/requirements.txt
  48. 2 1
      backend/start_windows.bat
  49. 305 189
      package-lock.json
  50. 4 2
      package.json
  51. 2 1
      pyproject.toml
  52. 1 1
      src/app.css
  53. 183 0
      src/lib/apis/index.ts
  54. 39 8
      src/lib/components/AddConnectionModal.svelte
  55. 215 0
      src/lib/components/AddServerModal.svelte
  56. 11 11
      src/lib/components/admin/Settings.svelte
  57. 13 1
      src/lib/components/admin/Settings/Connections/OllamaConnection.svelte
  58. 12 1
      src/lib/components/admin/Settings/Connections/OpenAIConnection.svelte
  59. 155 107
      src/lib/components/admin/Settings/Documents.svelte
  60. 11 2
      src/lib/components/admin/Settings/Evaluations/ArenaModelModal.svelte
  61. 8 1
      src/lib/components/admin/Settings/General.svelte
  62. 9 5
      src/lib/components/admin/Settings/Images.svelte
  63. 135 35
      src/lib/components/admin/Settings/Models.svelte
  64. 2 1
      src/lib/components/admin/Settings/Models/ModelList.svelte
  65. 116 0
      src/lib/components/admin/Settings/Models/ModelMenu.svelte
  66. 6 2
      src/lib/components/admin/Settings/WebSearch.svelte
  67. 8 1
      src/lib/components/admin/Users/Groups.svelte
  68. 14 3
      src/lib/components/admin/Users/Groups/EditGroupModal.svelte
  69. 53 1
      src/lib/components/admin/Users/Groups/Permissions.svelte
  70. 15 1
      src/lib/components/admin/Users/UserList/UserChatsModal.svelte
  71. 7 4
      src/lib/components/channel/Messages.svelte
  72. 37 24
      src/lib/components/chat/Chat.svelte
  73. 1 1
      src/lib/components/chat/ContentRenderer/FloatingButtons.svelte
  74. 36 40
      src/lib/components/chat/Controls/Controls.svelte
  75. 57 58
      src/lib/components/chat/MessageInput.svelte
  76. 1 1
      src/lib/components/chat/MessageInput/Commands/Knowledge.svelte
  77. 16 2
      src/lib/components/chat/MessageInput/Commands/Prompts.svelte
  78. 2 2
      src/lib/components/chat/MessageInput/InputMenu.svelte
  79. 42 0
      src/lib/components/chat/Messages.svelte
  80. 6 6
      src/lib/components/chat/Messages/Citations.svelte
  81. 3 3
      src/lib/components/chat/Messages/CitationsModal.svelte
  82. 4 2
      src/lib/components/chat/Messages/CodeBlock.svelte
  83. 6 1
      src/lib/components/chat/Messages/ContentRenderer.svelte
  84. 1 1
      src/lib/components/chat/Messages/Markdown.svelte
  85. 108 0
      src/lib/components/chat/Messages/Markdown/AlertRenderer.svelte
  86. 1 1
      src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens.svelte
  87. 15 5
      src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte
  88. 3 0
      src/lib/components/chat/Messages/Message.svelte
  89. 30 0
      src/lib/components/chat/Messages/MultiResponseMessages.svelte
  90. 54 10
      src/lib/components/chat/Messages/ResponseMessage.svelte
  91. 96 11
      src/lib/components/chat/Messages/UserMessage.svelte
  92. 2 1
      src/lib/components/chat/ModelSelector.svelte
  93. 148 42
      src/lib/components/chat/ModelSelector/Selector.svelte
  94. 13 29
      src/lib/components/chat/Navbar.svelte
  95. 3 0
      src/lib/components/chat/Placeholder.svelte
  96. 15 13
      src/lib/components/chat/Settings/Account.svelte
  97. 1 0
      src/lib/components/chat/Settings/Advanced/AdvancedParams.svelte
  98. 13 1
      src/lib/components/chat/Settings/Connections/Connection.svelte
  99. 87 48
      src/lib/components/chat/Settings/General.svelte
  100. 90 1
      src/lib/components/chat/Settings/Interface.svelte

+ 3 - 3
.github/pull_request_template.md

@@ -9,9 +9,9 @@
 - [ ] **Changelog:** Ensure a changelog entry following the format of [Keep a Changelog](https://keepachangelog.com/) is added at the bottom of the PR description.
 - [ ] **Documentation:** Have you updated relevant documentation [Open WebUI Docs](https://github.com/open-webui/docs), or other documentation sources?
 - [ ] **Dependencies:** Are there any new dependencies? Have you updated the dependency versions in the documentation?
-- [ ] **Testing:** Have you written and run sufficient tests for validating the changes?
+- [ ] **Testing:** Have you written and run sufficient tests to validate the changes?
 - [ ] **Code review:** Have you performed a self-review of your code, addressing any coding standard issues and ensuring adherence to the project's coding standards?
-- [ ] **Prefix:** To cleary categorize this pull request, prefix the pull request title, using one of the following:
+- [ ] **Prefix:** To clearly categorize this pull request, prefix the pull request title using one of the following:
   - **BREAKING CHANGE**: Significant changes that may affect compatibility
   - **build**: Changes that affect the build system or external dependencies
   - **ci**: Changes to our continuous integration processes or workflows
@@ -22,7 +22,7 @@
   - **i18n**: Internationalization or localization changes
   - **perf**: Performance improvement
   - **refactor**: Code restructuring for better maintainability, readability, or scalability
-  - **style**: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc.)
+  - **style**: Changes that do not affect the meaning of the code (white space, formatting, missing semi-colons, etc.)
   - **test**: Adding missing tests or correcting existing tests
   - **WIP**: Work in progress, a temporary label for incomplete or ongoing work
 

+ 49 - 0
CHANGELOG.md

@@ -5,6 +5,55 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.6.0] - 2025-03-31
+
+### Added
+
+- 🧩 **External Tool Server Support via OpenAPI**: Connect Open WebUI to any OpenAPI-compatible REST server instantly—offering immediate integration with thousands of developer tools, SDKs, and SaaS systems for powerful extensibility. Learn more: https://github.com/open-webui/openapi-servers
+- 🛠️ **MCP Server Support via MCPO**: You can now convert and expose your internal MCP tools as interoperable OpenAPI HTTP servers within Open WebUI for seamless, plug-n-play AI toolchain creation. Learn more: https://github.com/open-webui/mcpo
+- 📨 **/messages Chat API Endpoint Support**: For power users building external AI systems, new endpoints allow precise control of messages asynchronously—feed long-running external responses into Open WebUI chats without coupling with the frontend.
+- 📝 **Client-Side PDF Generation**: PDF exports are now generated fully client-side for drastically improved output quality—perfect for saving conversations or documents.
+- 💼 **Enforced Temporary Chats Mode**: Admins can now enforce temporary chat sessions by default to align with stringent data retention and compliance requirements.
+- 🌍 **Public Resource Sharing Permission Controls**: Fine-grained user group permissions now allow enabling/disabling public sharing for models, knowledge, prompts, and tools—ideal for privacy, team control, and internal deployments.
+- 📦 **Custom pip Options for Tools/Functions**: You can now specify custom pip installation options with "PIP_OPTIONS", "PIP_PACKAGE_INDEX_OPTIONS" environment variables—improving compatibility, support for private indexes, and better control over Python environments.
+- 🔢 **Editable Message Counter**: You can now double-click the message count number and jump straight to editing the index—quickly navigate complex chats or regenerate specific messages precisely.
+- 🧠 **Embedding Prefix Support Added**: Add custom prefixes to your embeddings for instruct-style tokens, enabling stronger model alignment and more consistent RAG performance.
+- 🙈 **Ability to Hide Base Models**: Optionally hide base models from the UI, helping users streamline model visibility and limit access to only usable endpoints..
+- 📚 **Docling Content Extraction Support**: Open WebUI now supports Docling as a content extraction engine, enabling smarter and more accurate parsing of complex file formats—ideal for advanced document understanding and Retrieval-Augmented Generation (RAG) workflows.
+- 🗃️ **Redis Sentinel Support Added**: Enhance deployment redundancy with support for Redis Sentinel for highly available, failover-safe Redis-based caching or pub/sub.
+- 📚 **JSON Schema Format for Ollama**: Added support for defining the format using JSON schema in Ollama-compatible models, improving flexibility and validation of model outputs.
+- 🔍 **Chat Sidebar Search "Clear” Button**: Quickly clear search filters in chat sidebar using the new ✖️ button—streamline your chat navigation with one click.
+- 🗂️ **Auto-Focus + Enter Submit for Folder Name**: When creating a new folder, the system automatically enters rename mode with name preselected—simplifying your org workflow.
+- 🧱 **Markdown Alerts Rendering**: Blockquotes with syntax hinting (e.g. ⚠️, ℹ️, ✅) now render styled Markdown alert banners, making messages and documentation more visually structured.
+- 🔁 **Hybrid Search Runs in Parallel Now**: Hybrid (BM25 + embedding) search components now run in parallel—dramatically reducing response times and speeding up document retrieval.
+- 📋 **Cleaner UI for Tool Call Display**: Optimized the visual layout of called tools inside chat messages for better clarity and reduced visual clutter.
+- 🧪 **Playwright Timeout Now Configurable**: Default timeout for Playwright processes is now shorter and adjustable via environment variables—making web scraping more robust and tunable to environments.
+- 📈 **OpenTelemetry Support for Observability**: Open WebUI now integrates with OpenTelemetry, allowing you to connect with tools like Grafana, Jaeger, or Prometheus for detailed performance insights and real-time visibility—entirely opt-in and fully self-hosted. Even if enabled, no data is ever sent to us, ensuring your privacy and ownership over all telemetry data.
+- 🛠 **General UI Enhancements & UX Polish**: Numerous refinements across sidebar, code blocks, modal interactions, button alignment, scrollbar visibility, and folder behavior improve overall fluidity and usability of the interface.
+- 🧱 **General Backend Refactoring**: Numerous backend components have been refactored to improve stability, maintainability, and performance—ensuring a more consistent and reliable system across all features.
+- 🌍 **Internationalization Language Support Updates**: Added Estonian and Galician languages, improved Spanish (fully revised), Traditional Chinese, Simplified Chinese, Turkish, Catalan, Ukrainian, and German for a more localized and inclusive interface.
+
+### Fixed
+
+- 🧑‍💻 **Firefox Input Height Bug**: Text input in Firefox now maintains proper height, ensuring message boxes look consistent and behave predictably.
+- 🧾 **Tika Blank Line Bug**: PDFs processed with Apache Tika 3.1.0.0 no longer introduce excessive blank lines—improving RAG output quality and visual cleanliness.
+- 🧪 **CSV Loader Encoding Issues**: CSV files with unknown encodings now automatically detect character sets, resolving import errors in non-UTF-8 datasets.
+- ✅ **LDAP Auth Config Fix**: Path to certificate file is now optional for LDAP setups, fixing authentication trouble for users without preconfigured cert paths.
+- 📥 **File Deletion in Bypass Mode**: Resolved issue where files couldn’t be deleted from knowledge when “bypass embedding” mode was enabled.
+- 🧩 **Hybrid Search Result Sorting & Deduplication Fixed**: Fixed citation and sorting issues in RAG hybrid and reranker modes, ensuring retrieved documents are shown in correct order per score.
+- 🧷 **Model Export/Import Broken for a Single Model**: Fixed bug where individual models couldn’t be exported or re-imported, restoring full portability.
+- 📫 **Auth Redirect Fix**: Logged-in users are now routed properly without unnecessary login prompts when already authenticated.
+
+### Changed
+
+- 🧠 **Prompt Autocompletion Disabled By Default**: Autocomplete suggestions while typing are now disabled unless explicitly re-enabled in user preferences—reduces distractions while composing prompts for advanced users.
+- 🧾 **Normalize Citation Numbering**: Source citations now properly begin from "1" instead of "0"—improving consistency and professional presentation in AI outputs.
+- 📚 **Improved Error Handling from Pipelines**: Pipelines now show the actual returned error message from failed tasks rather than generic "Connection closed"—making debugging far more user-friendly.
+
+### Removed
+
+- 🧾 **ENABLE_AUDIT_LOGS Setting Removed**: Deprecated setting “ENABLE_AUDIT_LOGS” has been fully removed—now controlled via “AUDIT_LOG_LEVEL” instead.
+
 ## [0.5.20] - 2025-03-05
 
 ### Added

+ 1 - 1
Dockerfile

@@ -132,7 +132,7 @@ RUN if [ "$USE_OLLAMA" = "true" ]; then \
 # install python dependencies
 COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
 
-RUN pip3 install uv && \
+RUN pip3 install --no-cache-dir uv && \
     if [ "$USE_CUDA" = "true" ]; then \
     # If you use CUDA the whisper and embedding model will be downloaded on first use
     pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \

+ 121 - 4
backend/open_webui/config.py

@@ -3,6 +3,7 @@ import logging
 import os
 import shutil
 import base64
+import redis
 
 from datetime import datetime
 from pathlib import Path
@@ -17,6 +18,9 @@ from open_webui.env import (
     DATA_DIR,
     DATABASE_URL,
     ENV,
+    REDIS_URL,
+    REDIS_SENTINEL_HOSTS,
+    REDIS_SENTINEL_PORT,
     FRONTEND_BUILD_DIR,
     OFFLINE_MODE,
     OPEN_WEBUI_DIR,
@@ -26,6 +30,7 @@ from open_webui.env import (
     log,
 )
 from open_webui.internal.db import Base, get_db
+from open_webui.utils.redis import get_redis_connection
 
 
 class EndpointFilter(logging.Filter):
@@ -248,9 +253,17 @@ class PersistentConfig(Generic[T]):
 
 class AppConfig:
     _state: dict[str, PersistentConfig]
+    _redis: Optional[redis.Redis] = None
 
-    def __init__(self):
+    def __init__(
+        self, redis_url: Optional[str] = None, redis_sentinels: Optional[list] = []
+    ):
         super().__setattr__("_state", {})
+        if redis_url:
+            super().__setattr__(
+                "_redis",
+                get_redis_connection(redis_url, redis_sentinels, decode_responses=True),
+            )
 
     def __setattr__(self, key, value):
         if isinstance(value, PersistentConfig):
@@ -259,7 +272,31 @@ class AppConfig:
             self._state[key].value = value
             self._state[key].save()
 
+            if self._redis:
+                redis_key = f"open-webui:config:{key}"
+                self._redis.set(redis_key, json.dumps(self._state[key].value))
+
     def __getattr__(self, key):
+        if key not in self._state:
+            raise AttributeError(f"Config key '{key}' not found")
+
+        # If Redis is available, check for an updated value
+        if self._redis:
+            redis_key = f"open-webui:config:{key}"
+            redis_value = self._redis.get(redis_key)
+
+            if redis_value is not None:
+                try:
+                    decoded_value = json.loads(redis_value)
+
+                    # Update the in-memory value if different
+                    if self._state[key].value != decoded_value:
+                        self._state[key].value = decoded_value
+                        log.info(f"Updated {key} from Redis: {decoded_value}")
+
+                except json.JSONDecodeError:
+                    log.error(f"Invalid JSON format in Redis for {key}: {redis_value}")
+
         return self._state[key].value
 
 
@@ -943,6 +980,35 @@ USER_PERMISSIONS_WORKSPACE_TOOLS_ACCESS = (
     os.environ.get("USER_PERMISSIONS_WORKSPACE_TOOLS_ACCESS", "False").lower() == "true"
 )
 
+USER_PERMISSIONS_WORKSPACE_MODELS_ALLOW_PUBLIC_SHARING = (
+    os.environ.get(
+        "USER_PERMISSIONS_WORKSPACE_MODELS_ALLOW_PUBLIC_SHARING", "False"
+    ).lower()
+    == "true"
+)
+
+USER_PERMISSIONS_WORKSPACE_KNOWLEDGE_ALLOW_PUBLIC_SHARING = (
+    os.environ.get(
+        "USER_PERMISSIONS_WORKSPACE_KNOWLEDGE_ALLOW_PUBLIC_SHARING", "False"
+    ).lower()
+    == "true"
+)
+
+USER_PERMISSIONS_WORKSPACE_PROMPTS_ALLOW_PUBLIC_SHARING = (
+    os.environ.get(
+        "USER_PERMISSIONS_WORKSPACE_PROMPTS_ALLOW_PUBLIC_SHARING", "False"
+    ).lower()
+    == "true"
+)
+
+USER_PERMISSIONS_WORKSPACE_TOOLS_ALLOW_PUBLIC_SHARING = (
+    os.environ.get(
+        "USER_PERMISSIONS_WORKSPACE_TOOLS_ALLOW_PUBLIC_SHARING", "False"
+    ).lower()
+    == "true"
+)
+
+
 USER_PERMISSIONS_CHAT_CONTROLS = (
     os.environ.get("USER_PERMISSIONS_CHAT_CONTROLS", "True").lower() == "true"
 )
@@ -963,6 +1029,11 @@ USER_PERMISSIONS_CHAT_TEMPORARY = (
     os.environ.get("USER_PERMISSIONS_CHAT_TEMPORARY", "True").lower() == "true"
 )
 
+USER_PERMISSIONS_CHAT_TEMPORARY_ENFORCED = (
+    os.environ.get("USER_PERMISSIONS_CHAT_TEMPORARY_ENFORCED", "False").lower()
+    == "true"
+)
+
 USER_PERMISSIONS_FEATURES_WEB_SEARCH = (
     os.environ.get("USER_PERMISSIONS_FEATURES_WEB_SEARCH", "True").lower() == "true"
 )
@@ -985,12 +1056,19 @@ DEFAULT_USER_PERMISSIONS = {
         "prompts": USER_PERMISSIONS_WORKSPACE_PROMPTS_ACCESS,
         "tools": USER_PERMISSIONS_WORKSPACE_TOOLS_ACCESS,
     },
+    "sharing": {
+        "public_models": USER_PERMISSIONS_WORKSPACE_MODELS_ALLOW_PUBLIC_SHARING,
+        "public_knowledge": USER_PERMISSIONS_WORKSPACE_KNOWLEDGE_ALLOW_PUBLIC_SHARING,
+        "public_prompts": USER_PERMISSIONS_WORKSPACE_PROMPTS_ALLOW_PUBLIC_SHARING,
+        "public_tools": USER_PERMISSIONS_WORKSPACE_TOOLS_ALLOW_PUBLIC_SHARING,
+    },
     "chat": {
         "controls": USER_PERMISSIONS_CHAT_CONTROLS,
         "file_upload": USER_PERMISSIONS_CHAT_FILE_UPLOAD,
         "delete": USER_PERMISSIONS_CHAT_DELETE,
         "edit": USER_PERMISSIONS_CHAT_EDIT,
         "temporary": USER_PERMISSIONS_CHAT_TEMPORARY,
+        "temporary_enforced": USER_PERMISSIONS_CHAT_TEMPORARY_ENFORCED,
     },
     "features": {
         "web_search": USER_PERMISSIONS_FEATURES_WEB_SEARCH,
@@ -1055,6 +1133,12 @@ ENABLE_MESSAGE_RATING = PersistentConfig(
     os.environ.get("ENABLE_MESSAGE_RATING", "True").lower() == "true",
 )
 
+ENABLE_USER_WEBHOOKS = PersistentConfig(
+    "ENABLE_USER_WEBHOOKS",
+    "ui.enable_user_webhooks",
+    os.environ.get("ENABLE_USER_WEBHOOKS", "True").lower() == "true",
+)
+
 
 def validate_cors_origins(origins):
     for origin in origins:
@@ -1276,7 +1360,7 @@ Strictly return in JSON format:
 ENABLE_AUTOCOMPLETE_GENERATION = PersistentConfig(
     "ENABLE_AUTOCOMPLETE_GENERATION",
     "task.autocomplete.enable",
-    os.environ.get("ENABLE_AUTOCOMPLETE_GENERATION", "True").lower() == "true",
+    os.environ.get("ENABLE_AUTOCOMPLETE_GENERATION", "False").lower() == "true",
 )
 
 AUTOCOMPLETE_GENERATION_INPUT_MAX_LENGTH = PersistentConfig(
@@ -1548,8 +1632,10 @@ QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY", None)
 
 # OpenSearch
 OPENSEARCH_URI = os.environ.get("OPENSEARCH_URI", "https://localhost:9200")
-OPENSEARCH_SSL = os.environ.get("OPENSEARCH_SSL", True)
-OPENSEARCH_CERT_VERIFY = os.environ.get("OPENSEARCH_CERT_VERIFY", False)
+OPENSEARCH_SSL = os.environ.get("OPENSEARCH_SSL", "true").lower() == "true"
+OPENSEARCH_CERT_VERIFY = (
+    os.environ.get("OPENSEARCH_CERT_VERIFY", "false").lower() == "true"
+)
 OPENSEARCH_USERNAME = os.environ.get("OPENSEARCH_USERNAME", None)
 OPENSEARCH_PASSWORD = os.environ.get("OPENSEARCH_PASSWORD", None)
 
@@ -1623,6 +1709,12 @@ TIKA_SERVER_URL = PersistentConfig(
     os.getenv("TIKA_SERVER_URL", "http://tika:9998"),  # Default for sidecar deployment
 )
 
+DOCLING_SERVER_URL = PersistentConfig(
+    "DOCLING_SERVER_URL",
+    "rag.docling_server_url",
+    os.getenv("DOCLING_SERVER_URL", "http://docling:5001"),
+)
+
 DOCUMENT_INTELLIGENCE_ENDPOINT = PersistentConfig(
     "DOCUMENT_INTELLIGENCE_ENDPOINT",
     "rag.document_intelligence_endpoint",
@@ -1646,6 +1738,11 @@ BYPASS_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
 RAG_TOP_K = PersistentConfig(
     "RAG_TOP_K", "rag.top_k", int(os.environ.get("RAG_TOP_K", "3"))
 )
+RAG_TOP_K_RERANKER = PersistentConfig(
+    "RAG_TOP_K_RERANKER",
+    "rag.top_k_reranker",
+    int(os.environ.get("RAG_TOP_K_RERANKER", "3")),
+)
 RAG_RELEVANCE_THRESHOLD = PersistentConfig(
     "RAG_RELEVANCE_THRESHOLD",
     "rag.relevance_threshold",
@@ -1727,6 +1824,14 @@ RAG_EMBEDDING_BATCH_SIZE = PersistentConfig(
     ),
 )
 
+RAG_EMBEDDING_QUERY_PREFIX = os.environ.get("RAG_EMBEDDING_QUERY_PREFIX", None)
+
+RAG_EMBEDDING_CONTENT_PREFIX = os.environ.get("RAG_EMBEDDING_CONTENT_PREFIX", None)
+
+RAG_EMBEDDING_PREFIX_FIELD_NAME = os.environ.get(
+    "RAG_EMBEDDING_PREFIX_FIELD_NAME", None
+)
+
 RAG_RERANKING_MODEL = PersistentConfig(
     "RAG_RERANKING_MODEL",
     "rag.reranking_model",
@@ -1950,6 +2055,12 @@ TAVILY_API_KEY = PersistentConfig(
     os.getenv("TAVILY_API_KEY", ""),
 )
 
+TAVILY_EXTRACT_DEPTH = PersistentConfig(
+    "TAVILY_EXTRACT_DEPTH",
+    "rag.web.search.tavily_extract_depth",
+    os.getenv("TAVILY_EXTRACT_DEPTH", "basic"),
+)
+
 JINA_API_KEY = PersistentConfig(
     "JINA_API_KEY",
     "rag.web.search.jina_api_key",
@@ -2036,6 +2147,12 @@ PLAYWRIGHT_WS_URI = PersistentConfig(
     os.environ.get("PLAYWRIGHT_WS_URI", None),
 )
 
+PLAYWRIGHT_TIMEOUT = PersistentConfig(
+    "PLAYWRIGHT_TIMEOUT",
+    "rag.web.loader.engine.playwright.timeout",
+    int(os.environ.get("PLAYWRIGHT_TIMEOUT", "10")),
+)
+
 FIRECRAWL_API_KEY = PersistentConfig(
     "FIRECRAWL_API_KEY",
     "firecrawl.api_key",

+ 33 - 15
backend/open_webui/env.py

@@ -105,7 +105,6 @@ for source in log_sources:
 
 log.setLevel(SRC_LOG_LEVELS["CONFIG"])
 
-
 WEBUI_NAME = os.environ.get("WEBUI_NAME", "Open WebUI")
 if WEBUI_NAME != "Open WebUI":
     WEBUI_NAME += " (Open WebUI)"
@@ -130,7 +129,6 @@ else:
     except Exception:
         PACKAGE_DATA = {"version": "0.0.0"}
 
-
 VERSION = PACKAGE_DATA["version"]
 
 
@@ -161,7 +159,6 @@ try:
 except Exception:
     changelog_content = (pkgutil.get_data("open_webui", "CHANGELOG.md") or b"").decode()
 
-
 # Convert markdown content to HTML
 html_content = markdown.markdown(changelog_content)
 
@@ -192,7 +189,6 @@ for version in soup.find_all("h2"):
 
     changelog_json[version_number] = version_data
 
-
 CHANGELOG = changelog_json
 
 ####################################
@@ -209,7 +205,6 @@ ENABLE_FORWARD_USER_INFO_HEADERS = (
     os.environ.get("ENABLE_FORWARD_USER_INFO_HEADERS", "False").lower() == "true"
 )
 
-
 ####################################
 # WEBUI_BUILD_HASH
 ####################################
@@ -244,7 +239,6 @@ if FROM_INIT_PY:
 
     DATA_DIR = Path(os.getenv("DATA_DIR", OPEN_WEBUI_DIR / "data"))
 
-
 STATIC_DIR = Path(os.getenv("STATIC_DIR", OPEN_WEBUI_DIR / "static"))
 
 FONTS_DIR = Path(os.getenv("FONTS_DIR", OPEN_WEBUI_DIR / "static" / "fonts"))
@@ -256,7 +250,6 @@ if FROM_INIT_PY:
         os.getenv("FRONTEND_BUILD_DIR", OPEN_WEBUI_DIR / "frontend")
     ).resolve()
 
-
 ####################################
 # Database
 ####################################
@@ -321,7 +314,6 @@ RESET_CONFIG_ON_START = (
     os.environ.get("RESET_CONFIG_ON_START", "False").lower() == "true"
 )
 
-
 ENABLE_REALTIME_CHAT_SAVE = (
     os.environ.get("ENABLE_REALTIME_CHAT_SAVE", "False").lower() == "true"
 )
@@ -330,7 +322,9 @@ ENABLE_REALTIME_CHAT_SAVE = (
 # REDIS
 ####################################
 
-REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
+REDIS_URL = os.environ.get("REDIS_URL", "")
+REDIS_SENTINEL_HOSTS = os.environ.get("REDIS_SENTINEL_HOSTS", "")
+REDIS_SENTINEL_PORT = os.environ.get("REDIS_SENTINEL_PORT", "26379")
 
 ####################################
 # WEBUI_AUTH (Required for security)
@@ -387,6 +381,10 @@ WEBSOCKET_MANAGER = os.environ.get("WEBSOCKET_MANAGER", "")
 WEBSOCKET_REDIS_URL = os.environ.get("WEBSOCKET_REDIS_URL", REDIS_URL)
 WEBSOCKET_REDIS_LOCK_TIMEOUT = os.environ.get("WEBSOCKET_REDIS_LOCK_TIMEOUT", 60)
 
+WEBSOCKET_SENTINEL_HOSTS = os.environ.get("WEBSOCKET_SENTINEL_HOSTS", "")
+
+WEBSOCKET_SENTINEL_PORT = os.environ.get("WEBSOCKET_SENTINEL_PORT", "26379")
+
 AIOHTTP_CLIENT_TIMEOUT = os.environ.get("AIOHTTP_CLIENT_TIMEOUT", "")
 
 if AIOHTTP_CLIENT_TIMEOUT == "":
@@ -399,18 +397,16 @@ else:
 
 AIOHTTP_CLIENT_TIMEOUT_MODEL_LIST = os.environ.get(
     "AIOHTTP_CLIENT_TIMEOUT_MODEL_LIST",
-    os.environ.get("AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST", ""),
+    os.environ.get("AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST", "10"),
 )
 
-
 if AIOHTTP_CLIENT_TIMEOUT_MODEL_LIST == "":
     AIOHTTP_CLIENT_TIMEOUT_MODEL_LIST = None
 else:
     try:
         AIOHTTP_CLIENT_TIMEOUT_MODEL_LIST = int(AIOHTTP_CLIENT_TIMEOUT_MODEL_LIST)
     except Exception:
-        AIOHTTP_CLIENT_TIMEOUT_MODEL_LIST = 5
-
+        AIOHTTP_CLIENT_TIMEOUT_MODEL_LIST = 10
 
 ####################################
 # OFFLINE_MODE
@@ -424,13 +420,12 @@ if OFFLINE_MODE:
 ####################################
 # AUDIT LOGGING
 ####################################
-ENABLE_AUDIT_LOGS = os.getenv("ENABLE_AUDIT_LOGS", "false").lower() == "true"
 # Where to store log file
 AUDIT_LOGS_FILE_PATH = f"{DATA_DIR}/audit.log"
 # Maximum size of a file before rotating into a new log file
 AUDIT_LOG_FILE_ROTATION_SIZE = os.getenv("AUDIT_LOG_FILE_ROTATION_SIZE", "10MB")
 # METADATA | REQUEST | REQUEST_RESPONSE
-AUDIT_LOG_LEVEL = os.getenv("AUDIT_LOG_LEVEL", "REQUEST_RESPONSE").upper()
+AUDIT_LOG_LEVEL = os.getenv("AUDIT_LOG_LEVEL", "NONE").upper()
 try:
     MAX_BODY_LOG_SIZE = int(os.environ.get("MAX_BODY_LOG_SIZE") or 2048)
 except ValueError:
@@ -442,3 +437,26 @@ AUDIT_EXCLUDED_PATHS = os.getenv("AUDIT_EXCLUDED_PATHS", "/chats,/chat,/folders"
 )
 AUDIT_EXCLUDED_PATHS = [path.strip() for path in AUDIT_EXCLUDED_PATHS]
 AUDIT_EXCLUDED_PATHS = [path.lstrip("/") for path in AUDIT_EXCLUDED_PATHS]
+
+####################################
+# OPENTELEMETRY
+####################################
+
+ENABLE_OTEL = os.environ.get("ENABLE_OTEL", "False").lower() == "true"
+OTEL_EXPORTER_OTLP_ENDPOINT = os.environ.get(
+    "OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317"
+)
+OTEL_SERVICE_NAME = os.environ.get("OTEL_SERVICE_NAME", "open-webui")
+OTEL_RESOURCE_ATTRIBUTES = os.environ.get(
+    "OTEL_RESOURCE_ATTRIBUTES", ""
+)  # e.g. key1=val1,key2=val2
+OTEL_TRACES_SAMPLER = os.environ.get(
+    "OTEL_TRACES_SAMPLER", "parentbased_always_on"
+).lower()
+
+####################################
+# TOOLS/FUNCTIONS PIP OPTIONS
+####################################
+
+PIP_OPTIONS = os.getenv("PIP_OPTIONS", "").split()
+PIP_PACKAGE_INDEX_OPTIONS = os.getenv("PIP_PACKAGE_INDEX_OPTIONS", "").split()

+ 3 - 0
backend/open_webui/functions.py

@@ -223,6 +223,9 @@ async def generate_function_chat_completion(
     extra_params = {
         "__event_emitter__": __event_emitter__,
         "__event_call__": __event_call__,
+        "__chat_id__": metadata.get("chat_id", None),
+        "__session_id__": metadata.get("session_id", None),
+        "__message_id__": metadata.get("message_id", None),
         "__task__": __task__,
         "__task_body__": __task_body__,
         "__files__": files,

+ 62 - 9
backend/open_webui/main.py

@@ -84,11 +84,12 @@ from open_webui.routers.retrieval import (
     get_rf,
 )
 
-from open_webui.internal.db import Session
+from open_webui.internal.db import Session, engine
 
 from open_webui.models.functions import Functions
 from open_webui.models.models import Models
 from open_webui.models.users import UserModel, Users
+from open_webui.models.chats import Chats
 
 from open_webui.config import (
     LICENSE_KEY,
@@ -155,6 +156,7 @@ from open_webui.config import (
     AUDIO_TTS_AZURE_SPEECH_REGION,
     AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
     PLAYWRIGHT_WS_URI,
+    PLAYWRIGHT_TIMEOUT,
     FIRECRAWL_API_BASE_URL,
     FIRECRAWL_API_KEY,
     RAG_WEB_LOADER_ENGINE,
@@ -186,9 +188,11 @@ from open_webui.config import (
     CHUNK_SIZE,
     CONTENT_EXTRACTION_ENGINE,
     TIKA_SERVER_URL,
+    DOCLING_SERVER_URL,
     DOCUMENT_INTELLIGENCE_ENDPOINT,
     DOCUMENT_INTELLIGENCE_KEY,
     RAG_TOP_K,
+    RAG_TOP_K_RERANKER,
     RAG_TEXT_SPLITTER,
     TIKTOKEN_ENCODING_NAME,
     PDF_EXTRACT_IMAGES,
@@ -212,6 +216,7 @@ from open_webui.config import (
     SERPSTACK_API_KEY,
     SERPSTACK_HTTPS,
     TAVILY_API_KEY,
+    TAVILY_EXTRACT_DEPTH,
     BING_SEARCH_V7_ENDPOINT,
     BING_SEARCH_V7_SUBSCRIPTION_KEY,
     BRAVE_SEARCH_API_KEY,
@@ -248,6 +253,7 @@ from open_webui.config import (
     ENABLE_CHANNELS,
     ENABLE_COMMUNITY_SHARING,
     ENABLE_MESSAGE_RATING,
+    ENABLE_USER_WEBHOOKS,
     ENABLE_EVALUATION_ARENA_MODELS,
     USER_PERMISSIONS,
     DEFAULT_USER_ROLE,
@@ -312,6 +318,9 @@ from open_webui.env import (
     AUDIT_EXCLUDED_PATHS,
     AUDIT_LOG_LEVEL,
     CHANGELOG,
+    REDIS_URL,
+    REDIS_SENTINEL_HOSTS,
+    REDIS_SENTINEL_PORT,
     GLOBAL_LOG_LEVEL,
     MAX_BODY_LOG_SIZE,
     SAFE_MODE,
@@ -327,6 +336,7 @@ from open_webui.env import (
     BYPASS_MODEL_ACCESS_CONTROL,
     RESET_CONFIG_ON_START,
     OFFLINE_MODE,
+    ENABLE_OTEL,
 )
 
 
@@ -354,6 +364,8 @@ from open_webui.utils.security_headers import SecurityHeadersMiddleware
 
 from open_webui.tasks import stop_task, list_tasks  # Import from tasks.py
 
+from open_webui.utils.redis import get_sentinels_from_env
+
 
 if SAFE_MODE:
     print("SAFE MODE ENABLED")
@@ -418,11 +430,27 @@ app = FastAPI(
 
 oauth_manager = OAuthManager(app)
 
-app.state.config = AppConfig()
+app.state.config = AppConfig(
+    redis_url=REDIS_URL,
+    redis_sentinels=get_sentinels_from_env(REDIS_SENTINEL_HOSTS, REDIS_SENTINEL_PORT),
+)
 
 app.state.WEBUI_NAME = WEBUI_NAME
 app.state.LICENSE_METADATA = None
 
+
+########################################
+#
+# OPENTELEMETRY
+#
+########################################
+
+if ENABLE_OTEL:
+    from open_webui.utils.telemetry.setup import setup as setup_opentelemetry
+
+    setup_opentelemetry(app=app, db_engine=engine)
+
+
 ########################################
 #
 # OLLAMA
@@ -492,6 +520,7 @@ app.state.config.MODEL_ORDER_LIST = MODEL_ORDER_LIST
 app.state.config.ENABLE_CHANNELS = ENABLE_CHANNELS
 app.state.config.ENABLE_COMMUNITY_SHARING = ENABLE_COMMUNITY_SHARING
 app.state.config.ENABLE_MESSAGE_RATING = ENABLE_MESSAGE_RATING
+app.state.config.ENABLE_USER_WEBHOOKS = ENABLE_USER_WEBHOOKS
 
 app.state.config.ENABLE_EVALUATION_ARENA_MODELS = ENABLE_EVALUATION_ARENA_MODELS
 app.state.config.EVALUATION_ARENA_MODELS = EVALUATION_ARENA_MODELS
@@ -535,6 +564,7 @@ app.state.FUNCTIONS = {}
 
 
 app.state.config.TOP_K = RAG_TOP_K
+app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER
 app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD
 app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE
 app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT
@@ -549,6 +579,7 @@ app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
 
 app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
 app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
+app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL
 app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT
 app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY
 
@@ -612,8 +643,10 @@ app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_
 app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE
 app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV
 app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI
+app.state.config.PLAYWRIGHT_TIMEOUT = PLAYWRIGHT_TIMEOUT
 app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL
 app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY
+app.state.config.TAVILY_EXTRACT_DEPTH = TAVILY_EXTRACT_DEPTH
 
 app.state.EMBEDDING_FUNCTION = None
 app.state.ef = None
@@ -947,14 +980,24 @@ async def get_models(request: Request, user=Depends(get_verified_user)):
 
         return filtered_models
 
-    models = await get_all_models(request, user=user)
+    all_models = await get_all_models(request, user=user)
+
+    models = []
+    for model in all_models:
+        # Filter out filter pipelines
+        if "pipeline" in model and model["pipeline"].get("type", None) == "filter":
+            continue
+
+        model_tags = [
+            tag.get("name")
+            for tag in model.get("info", {}).get("meta", {}).get("tags", [])
+        ]
+        tags = [tag.get("name") for tag in model.get("tags", [])]
+
+        tags = list(set(model_tags + tags))
+        model["tags"] = [{"name": tag} for tag in tags]
 
-    # Filter out filter pipelines
-    models = [
-        model
-        for model in models
-        if "pipeline" not in model or model["pipeline"].get("type", None) != "filter"
-    ]
+        models.append(model)
 
     model_order_list = request.app.state.config.MODEL_ORDER_LIST
     if model_order_list:
@@ -1020,6 +1063,7 @@ async def chat_completion(
             "message_id": form_data.pop("id", None),
             "session_id": form_data.pop("session_id", None),
             "tool_ids": form_data.get("tool_ids", None),
+            "tool_servers": form_data.pop("tool_servers", None),
             "files": form_data.get("files", None),
             "features": form_data.get("features", None),
             "variables": form_data.get("variables", None),
@@ -1046,6 +1090,14 @@ async def chat_completion(
 
     except Exception as e:
         log.debug(f"Error processing chat payload: {e}")
+        Chats.upsert_message_to_chat_by_id_and_message_id(
+            metadata["chat_id"],
+            metadata["message_id"],
+            {
+                "error": {"content": str(e)},
+            },
+        )
+
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=str(e),
@@ -1181,6 +1233,7 @@ async def get_app_config(request: Request):
                     "enable_autocomplete_generation": app.state.config.ENABLE_AUTOCOMPLETE_GENERATION,
                     "enable_community_sharing": app.state.config.ENABLE_COMMUNITY_SHARING,
                     "enable_message_rating": app.state.config.ENABLE_MESSAGE_RATING,
+                    "enable_user_webhooks": app.state.config.ENABLE_USER_WEBHOOKS,
                     "enable_admin_export": ENABLE_ADMIN_EXPORT,
                     "enable_admin_chat_access": ENABLE_ADMIN_CHAT_ACCESS,
                     "enable_google_drive_integration": app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,

+ 13 - 6
backend/open_webui/models/folders.py

@@ -9,6 +9,8 @@ from open_webui.models.chats import Chats
 from open_webui.env import SRC_LOG_LEVELS
 from pydantic import BaseModel, ConfigDict
 from sqlalchemy import BigInteger, Column, Text, JSON, Boolean
+from open_webui.utils.access_control import get_permissions
+
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["MODELS"])
@@ -234,15 +236,18 @@ class FolderTable:
             log.error(f"update_folder: {e}")
             return
 
-    def delete_folder_by_id_and_user_id(self, id: str, user_id: str) -> bool:
+    def delete_folder_by_id_and_user_id(
+        self, id: str, user_id: str, delete_chats=True
+    ) -> bool:
         try:
             with get_db() as db:
                 folder = db.query(Folder).filter_by(id=id, user_id=user_id).first()
                 if not folder:
                     return False
 
-                # Delete all chats in the folder
-                Chats.delete_chats_by_user_id_and_folder_id(user_id, folder.id)
+                if delete_chats:
+                    # Delete all chats in the folder
+                    Chats.delete_chats_by_user_id_and_folder_id(user_id, folder.id)
 
                 # Delete all children folders
                 def delete_children(folder):
@@ -250,9 +255,11 @@ class FolderTable:
                         folder.id, user_id
                     )
                     for folder_child in folder_children:
-                        Chats.delete_chats_by_user_id_and_folder_id(
-                            user_id, folder_child.id
-                        )
+                        if delete_chats:
+                            Chats.delete_chats_by_user_id_and_folder_id(
+                                user_id, folder_child.id
+                            )
+
                         delete_children(folder_child)
 
                         folder = db.query(Folder).filter_by(id=folder_child.id).first()

+ 54 - 2
backend/open_webui/retrieval/loaders/main.py

@@ -105,7 +105,7 @@ class TikaLoader:
 
         if r.ok:
             raw_metadata = r.json()
-            text = raw_metadata.get("X-TIKA:content", "<No text content found>")
+            text = raw_metadata.get("X-TIKA:content", "<No text content found>").strip()
 
             if "Content-Type" in raw_metadata:
                 headers["Content-Type"] = raw_metadata["Content-Type"]
@@ -117,6 +117,52 @@ class TikaLoader:
             raise Exception(f"Error calling Tika: {r.reason}")
 
 
+class DoclingLoader:
+    def __init__(self, url, file_path=None, mime_type=None):
+        self.url = url.rstrip("/")
+        self.file_path = file_path
+        self.mime_type = mime_type
+
+    def load(self) -> list[Document]:
+        with open(self.file_path, "rb") as f:
+            files = {
+                "files": (
+                    self.file_path,
+                    f,
+                    self.mime_type or "application/octet-stream",
+                )
+            }
+
+            params = {
+                "image_export_mode": "placeholder",
+                "table_mode": "accurate",
+            }
+
+            endpoint = f"{self.url}/v1alpha/convert/file"
+            r = requests.post(endpoint, files=files, data=params)
+
+        if r.ok:
+            result = r.json()
+            document_data = result.get("document", {})
+            text = document_data.get("md_content", "<No text content found>")
+
+            metadata = {"Content-Type": self.mime_type} if self.mime_type else {}
+
+            log.debug("Docling extracted text: %s", text)
+
+            return [Document(page_content=text, metadata=metadata)]
+        else:
+            error_msg = f"Error calling Docling API: {r.reason}"
+            if r.text:
+                try:
+                    error_data = r.json()
+                    if "detail" in error_data:
+                        error_msg += f" - {error_data['detail']}"
+                except Exception:
+                    error_msg += f" - {r.text}"
+            raise Exception(f"Error calling Docling: {error_msg}")
+
+
 class Loader:
     def __init__(self, engine: str = "", **kwargs):
         self.engine = engine
@@ -149,6 +195,12 @@ class Loader:
                     file_path=file_path,
                     mime_type=file_content_type,
                 )
+        elif self.engine == "docling" and self.kwargs.get("DOCLING_SERVER_URL"):
+            loader = DoclingLoader(
+                url=self.kwargs.get("DOCLING_SERVER_URL"),
+                file_path=file_path,
+                mime_type=file_content_type,
+            )
         elif (
             self.engine == "document_intelligence"
             and self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT") != ""
@@ -176,7 +228,7 @@ class Loader:
                     file_path, extract_images=self.kwargs.get("PDF_EXTRACT_IMAGES")
                 )
             elif file_ext == "csv":
-                loader = CSVLoader(file_path)
+                loader = CSVLoader(file_path, autodetect_encoding=True)
             elif file_ext == "rst":
                 loader = UnstructuredRSTLoader(file_path, mode="elements")
             elif file_ext == "xml":

+ 93 - 0
backend/open_webui/retrieval/loaders/tavily.py

@@ -0,0 +1,93 @@
+import requests
+import logging
+from typing import Iterator, List, Literal, Union
+
+from langchain_core.document_loaders import BaseLoader
+from langchain_core.documents import Document
+from open_webui.env import SRC_LOG_LEVELS
+
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+
+
+class TavilyLoader(BaseLoader):
+    """Extract web page content from URLs using Tavily Extract API.
+
+    This is a LangChain document loader that uses Tavily's Extract API to
+    retrieve content from web pages and return it as Document objects.
+
+    Args:
+        urls: URL or list of URLs to extract content from.
+        api_key: The Tavily API key.
+        extract_depth: Depth of extraction, either "basic" or "advanced".
+        continue_on_failure: Whether to continue if extraction of a URL fails.
+    """
+
+    def __init__(
+        self,
+        urls: Union[str, List[str]],
+        api_key: str,
+        extract_depth: Literal["basic", "advanced"] = "basic",
+        continue_on_failure: bool = True,
+    ) -> None:
+        """Initialize Tavily Extract client.
+
+        Args:
+            urls: URL or list of URLs to extract content from.
+            api_key: The Tavily API key.
+            include_images: Whether to include images in the extraction.
+            extract_depth: Depth of extraction, either "basic" or "advanced".
+                advanced extraction retrieves more data, including tables and
+                embedded content, with higher success but may increase latency.
+                basic costs 1 credit per 5 successful URL extractions,
+                advanced costs 2 credits per 5 successful URL extractions.
+            continue_on_failure: Whether to continue if extraction of a URL fails.
+        """
+        if not urls:
+            raise ValueError("At least one URL must be provided.")
+
+        self.api_key = api_key
+        self.urls = urls if isinstance(urls, list) else [urls]
+        self.extract_depth = extract_depth
+        self.continue_on_failure = continue_on_failure
+        self.api_url = "https://api.tavily.com/extract"
+
+    def lazy_load(self) -> Iterator[Document]:
+        """Extract and yield documents from the URLs using Tavily Extract API."""
+        batch_size = 20
+        for i in range(0, len(self.urls), batch_size):
+            batch_urls = self.urls[i : i + batch_size]
+            try:
+                headers = {
+                    "Content-Type": "application/json",
+                    "Authorization": f"Bearer {self.api_key}",
+                }
+                # Use string for single URL, array for multiple URLs
+                urls_param = batch_urls[0] if len(batch_urls) == 1 else batch_urls
+                payload = {"urls": urls_param, "extract_depth": self.extract_depth}
+                # Make the API call
+                response = requests.post(self.api_url, headers=headers, json=payload)
+                response.raise_for_status()
+                response_data = response.json()
+                # Process successful results
+                for result in response_data.get("results", []):
+                    url = result.get("url", "")
+                    content = result.get("raw_content", "")
+                    if not content:
+                        log.warning(f"No content extracted from {url}")
+                        continue
+                    # Add URLs as metadata
+                    metadata = {"source": url}
+                    yield Document(
+                        page_content=content,
+                        metadata=metadata,
+                    )
+                for failed in response_data.get("failed_results", []):
+                    url = failed.get("url", "")
+                    error = failed.get("error", "Unknown error")
+                    log.error(f"Failed to extract content from {url}: {error}")
+            except Exception as e:
+                if self.continue_on_failure:
+                    log.error(f"Error extracting content from batch {batch_urls}: {e}")
+                else:
+                    raise e

+ 155 - 65
backend/open_webui/retrieval/utils.py

@@ -1,30 +1,35 @@
 import logging
 import os
-import uuid
 from typing import Optional, Union
 
-import asyncio
 import requests
 import hashlib
+from concurrent.futures import ThreadPoolExecutor
 
 from huggingface_hub import snapshot_download
 from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
 from langchain_community.retrievers import BM25Retriever
 from langchain_core.documents import Document
 
-
 from open_webui.config import VECTOR_DB
 from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT
-from open_webui.utils.misc import get_last_user_message, calculate_sha256_string
 
 from open_webui.models.users import UserModel
 from open_webui.models.files import Files
 
+from open_webui.retrieval.vector.main import GetResult
+
+
 from open_webui.env import (
     SRC_LOG_LEVELS,
     OFFLINE_MODE,
     ENABLE_FORWARD_USER_INFO_HEADERS,
 )
+from open_webui.config import (
+    RAG_EMBEDDING_QUERY_PREFIX,
+    RAG_EMBEDDING_CONTENT_PREFIX,
+    RAG_EMBEDDING_PREFIX_FIELD_NAME,
+)
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])
@@ -49,7 +54,7 @@ class VectorSearchRetriever(BaseRetriever):
     ) -> list[Document]:
         result = VECTOR_DB_CLIENT.search(
             collection_name=self.collection_name,
-            vectors=[self.embedding_function(query)],
+            vectors=[self.embedding_function(query, RAG_EMBEDDING_QUERY_PREFIX)],
             limit=self.top_k,
         )
 
@@ -102,18 +107,18 @@ def get_doc(collection_name: str, user: UserModel = None):
 
 def query_doc_with_hybrid_search(
     collection_name: str,
+    collection_result: GetResult,
     query: str,
     embedding_function,
     k: int,
     reranking_function,
+    k_reranker: int,
     r: float,
 ) -> dict:
     try:
-        result = VECTOR_DB_CLIENT.get(collection_name=collection_name)
-
         bm25_retriever = BM25Retriever.from_texts(
-            texts=result.documents[0],
-            metadatas=result.metadatas[0],
+            texts=collection_result.documents[0],
+            metadatas=collection_result.metadatas[0],
         )
         bm25_retriever.k = k
 
@@ -128,7 +133,7 @@ def query_doc_with_hybrid_search(
         )
         compressor = RerankCompressor(
             embedding_function=embedding_function,
-            top_n=k,
+            top_n=k_reranker,
             reranking_function=reranking_function,
             r_score=r,
         )
@@ -138,10 +143,23 @@ def query_doc_with_hybrid_search(
         )
 
         result = compression_retriever.invoke(query)
+
+        distances = [d.metadata.get("score") for d in result]
+        documents = [d.page_content for d in result]
+        metadatas = [d.metadata for d in result]
+
+        # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
+        if k < k_reranker:
+            sorted_items = sorted(
+                zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True
+            )
+            sorted_items = sorted_items[:k]
+            distances, documents, metadatas = map(list, zip(*sorted_items))
+
         result = {
-            "distances": [[d.metadata.get("score") for d in result]],
-            "documents": [[d.page_content for d in result]],
-            "metadatas": [[d.metadata for d in result]],
+            "distances": [distances],
+            "documents": [documents],
+            "metadatas": [metadatas],
         }
 
         log.info(
@@ -174,12 +192,9 @@ def merge_get_results(get_results: list[dict]) -> dict:
     return result
 
 
-def merge_and_sort_query_results(
-    query_results: list[dict], k: int, reverse: bool = False
-) -> dict:
+def merge_and_sort_query_results(query_results: list[dict], k: int) -> dict:
     # Initialize lists to store combined data
-    combined = []
-    seen_hashes = set()  # To store unique document hashes
+    combined = dict()  # To store documents with unique document hashes
 
     for data in query_results:
         distances = data["distances"][0]
@@ -192,12 +207,17 @@ def merge_and_sort_query_results(
                     document.encode()
                 ).hexdigest()  # Compute a hash for uniqueness
 
-                if doc_hash not in seen_hashes:
-                    seen_hashes.add(doc_hash)
-                    combined.append((distance, document, metadata))
+                if doc_hash not in combined.keys():
+                    combined[doc_hash] = (distance, document, metadata)
+                    continue  # if doc is new, no further comparison is needed
+
+                # if doc is alredy in, but new distance is better, update
+                if distance > combined[doc_hash][0]:
+                    combined[doc_hash] = (distance, document, metadata)
 
+    combined = list(combined.values())
     # Sort the list based on distances
-    combined.sort(key=lambda x: x[0], reverse=reverse)
+    combined.sort(key=lambda x: x[0], reverse=True)
 
     # Slice to keep only the top k elements
     sorted_distances, sorted_documents, sorted_metadatas = (
@@ -237,7 +257,7 @@ def query_collection(
 ) -> dict:
     results = []
     for query in queries:
-        query_embedding = embedding_function(query)
+        query_embedding = embedding_function(query, prefix=RAG_EMBEDDING_QUERY_PREFIX)
         for collection_name in collection_names:
             if collection_name:
                 try:
@@ -253,12 +273,7 @@ def query_collection(
             else:
                 pass
 
-    if VECTOR_DB == "chroma":
-        # Chroma uses unconventional cosine similarity, so we don't need to reverse the results
-        # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
-        return merge_and_sort_query_results(results, k=k, reverse=False)
-    else:
-        return merge_and_sort_query_results(results, k=k, reverse=True)
+    return merge_and_sort_query_results(results, k=k)
 
 
 def query_collection_with_hybrid_search(
@@ -267,39 +282,66 @@ def query_collection_with_hybrid_search(
     embedding_function,
     k: int,
     reranking_function,
+    k_reranker: int,
     r: float,
 ) -> dict:
     results = []
     error = False
+    # Fetch collection data once per collection sequentially
+    # Avoid fetching the same data multiple times later
+    collection_results = {}
     for collection_name in collection_names:
         try:
-            for query in queries:
-                result = query_doc_with_hybrid_search(
-                    collection_name=collection_name,
-                    query=query,
-                    embedding_function=embedding_function,
-                    k=k,
-                    reranking_function=reranking_function,
-                    r=r,
-                )
-                results.append(result)
+            collection_results[collection_name] = VECTOR_DB_CLIENT.get(
+                collection_name=collection_name
+            )
         except Exception as e:
-            log.exception(
-                "Error when querying the collection with " f"hybrid_search: {e}"
+            log.exception(f"Failed to fetch collection {collection_name}: {e}")
+            collection_results[collection_name] = None
+
+    log.info(
+        f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections..."
+    )
+
+    def process_query(collection_name, query):
+        try:
+            result = query_doc_with_hybrid_search(
+                collection_name=collection_name,
+                collection_result=collection_results[collection_name],
+                query=query,
+                embedding_function=embedding_function,
+                k=k,
+                reranking_function=reranking_function,
+                k_reranker=k_reranker,
+                r=r,
             )
+            return result, None
+        except Exception as e:
+            log.exception(f"Error when querying the collection with hybrid_search: {e}")
+            return None, e
+
+    tasks = [
+        (collection_name, query)
+        for collection_name in collection_names
+        for query in queries
+    ]
+
+    with ThreadPoolExecutor() as executor:
+        future_results = [executor.submit(process_query, cn, q) for cn, q in tasks]
+        task_results = [future.result() for future in future_results]
+
+    for result, err in task_results:
+        if err is not None:
             error = True
+        elif result is not None:
+            results.append(result)
 
-    if error:
+    if error and not results:
         raise Exception(
-            "Hybrid search failed for all collections. Using Non hybrid search as fallback."
+            "Hybrid search failed for all collections. Using Non-hybrid search as fallback."
         )
 
-    if VECTOR_DB == "chroma":
-        # Chroma uses unconventional cosine similarity, so we don't need to reverse the results
-        # https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
-        return merge_and_sort_query_results(results, k=k, reverse=False)
-    else:
-        return merge_and_sort_query_results(results, k=k, reverse=True)
+    return merge_and_sort_query_results(results, k=k)
 
 
 def get_embedding_function(
@@ -311,29 +353,38 @@ def get_embedding_function(
     embedding_batch_size,
 ):
     if embedding_engine == "":
-        return lambda query, user=None: embedding_function.encode(query).tolist()
+        return lambda query, prefix=None, user=None: embedding_function.encode(
+            query, prompt=prefix if prefix else None
+        ).tolist()
     elif embedding_engine in ["ollama", "openai"]:
-        func = lambda query, user=None: generate_embeddings(
+        func = lambda query, prefix=None, user=None: generate_embeddings(
             engine=embedding_engine,
             model=embedding_model,
             text=query,
+            prefix=prefix,
             url=url,
             key=key,
             user=user,
         )
 
-        def generate_multiple(query, user, func):
+        def generate_multiple(query, prefix, user, func):
             if isinstance(query, list):
                 embeddings = []
                 for i in range(0, len(query), embedding_batch_size):
                     embeddings.extend(
-                        func(query[i : i + embedding_batch_size], user=user)
+                        func(
+                            query[i : i + embedding_batch_size],
+                            prefix=prefix,
+                            user=user,
+                        )
                     )
                 return embeddings
             else:
-                return func(query, user)
+                return func(query, prefix, user)
 
-        return lambda query, user=None: generate_multiple(query, user, func)
+        return lambda query, prefix=None, user=None: generate_multiple(
+            query, prefix, user, func
+        )
     else:
         raise ValueError(f"Unknown embedding engine: {embedding_engine}")
 
@@ -345,6 +396,7 @@ def get_sources_from_files(
     embedding_function,
     k,
     reranking_function,
+    k_reranker,
     r,
     hybrid_search,
     full_context=False,
@@ -461,6 +513,7 @@ def get_sources_from_files(
                                     embedding_function=embedding_function,
                                     k=k,
                                     reranking_function=reranking_function,
+                                    k_reranker=k_reranker,
                                     r=r,
                                 )
                             except Exception as e:
@@ -553,9 +606,14 @@ def generate_openai_batch_embeddings(
     texts: list[str],
     url: str = "https://api.openai.com/v1",
     key: str = "",
+    prefix: str = None,
     user: UserModel = None,
 ) -> Optional[list[list[float]]]:
     try:
+        json_data = {"input": texts, "model": model}
+        if isinstance(RAG_EMBEDDING_PREFIX_FIELD_NAME, str) and isinstance(prefix, str):
+            json_data[RAG_EMBEDDING_PREFIX_FIELD_NAME] = prefix
+
         r = requests.post(
             f"{url}/embeddings",
             headers={
@@ -572,7 +630,7 @@ def generate_openai_batch_embeddings(
                     else {}
                 ),
             },
-            json={"input": texts, "model": model},
+            json=json_data,
         )
         r.raise_for_status()
         data = r.json()
@@ -586,9 +644,18 @@ def generate_openai_batch_embeddings(
 
 
 def generate_ollama_batch_embeddings(
-    model: str, texts: list[str], url: str, key: str = "", user: UserModel = None
+    model: str,
+    texts: list[str],
+    url: str,
+    key: str = "",
+    prefix: str = None,
+    user: UserModel = None,
 ) -> Optional[list[list[float]]]:
     try:
+        json_data = {"input": texts, "model": model}
+        if isinstance(RAG_EMBEDDING_PREFIX_FIELD_NAME, str) and isinstance(prefix, str):
+            json_data[RAG_EMBEDDING_PREFIX_FIELD_NAME] = prefix
+
         r = requests.post(
             f"{url}/api/embed",
             headers={
@@ -605,7 +672,7 @@ def generate_ollama_batch_embeddings(
                     else {}
                 ),
             },
-            json={"input": texts, "model": model},
+            json=json_data,
         )
         r.raise_for_status()
         data = r.json()
@@ -619,15 +686,34 @@ def generate_ollama_batch_embeddings(
         return None
 
 
-def generate_embeddings(engine: str, model: str, text: Union[str, list[str]], **kwargs):
+def generate_embeddings(
+    engine: str,
+    model: str,
+    text: Union[str, list[str]],
+    prefix: Union[str, None] = None,
+    **kwargs,
+):
     url = kwargs.get("url", "")
     key = kwargs.get("key", "")
     user = kwargs.get("user")
 
+    if prefix is not None and RAG_EMBEDDING_PREFIX_FIELD_NAME is None:
+        if isinstance(text, list):
+            text = [f"{prefix}{text_element}" for text_element in text]
+        else:
+            text = f"{prefix}{text}"
+
     if engine == "ollama":
         if isinstance(text, list):
             embeddings = generate_ollama_batch_embeddings(
-                **{"model": model, "texts": text, "url": url, "key": key, "user": user}
+                **{
+                    "model": model,
+                    "texts": text,
+                    "url": url,
+                    "key": key,
+                    "prefix": prefix,
+                    "user": user,
+                }
             )
         else:
             embeddings = generate_ollama_batch_embeddings(
@@ -636,16 +722,20 @@ def generate_embeddings(engine: str, model: str, text: Union[str, list[str]], **
                     "texts": [text],
                     "url": url,
                     "key": key,
+                    "prefix": prefix,
                     "user": user,
                 }
             )
         return embeddings[0] if isinstance(text, str) else embeddings
     elif engine == "openai":
         if isinstance(text, list):
-            embeddings = generate_openai_batch_embeddings(model, text, url, key, user)
+            embeddings = generate_openai_batch_embeddings(
+                model, text, url, key, prefix, user
+            )
         else:
-            embeddings = generate_openai_batch_embeddings(model, [text], url, key, user)
-
+            embeddings = generate_openai_batch_embeddings(
+                model, [text], url, key, prefix, user
+            )
         return embeddings[0] if isinstance(text, str) else embeddings
 
 
@@ -681,9 +771,9 @@ class RerankCompressor(BaseDocumentCompressor):
         else:
             from sentence_transformers import util
 
-            query_embedding = self.embedding_function(query)
+            query_embedding = self.embedding_function(query, RAG_EMBEDDING_QUERY_PREFIX)
             document_embedding = self.embedding_function(
-                [doc.page_content for doc in documents]
+                [doc.page_content for doc in documents], RAG_EMBEDDING_CONTENT_PREFIX
             )
             scores = util.cos_sim(query_embedding, document_embedding)[0]
 

+ 20 - 7
backend/open_webui/retrieval/vector/dbs/chroma.py

@@ -75,10 +75,16 @@ class ChromaClient:
                     n_results=limit,
                 )
 
+                # chromadb has cosine distance, 2 (worst) -> 0 (best). Re-odering to 0 -> 1
+                # https://docs.trychroma.com/docs/collections/configure cosine equation
+                distances: list = result["distances"][0]
+                distances = [2 - dist for dist in distances]
+                distances = [[dist / 2 for dist in distances]]
+
                 return SearchResult(
                     **{
                         "ids": result["ids"],
-                        "distances": result["distances"],
+                        "distances": distances,
                         "documents": result["documents"],
                         "metadatas": result["metadatas"],
                     }
@@ -166,12 +172,19 @@ class ChromaClient:
         filter: Optional[dict] = None,
     ):
         # Delete the items from the collection based on the ids.
-        collection = self.client.get_collection(name=collection_name)
-        if collection:
-            if ids:
-                collection.delete(ids=ids)
-            elif filter:
-                collection.delete(where=filter)
+        try:
+            collection = self.client.get_collection(name=collection_name)
+            if collection:
+                if ids:
+                    collection.delete(ids=ids)
+                elif filter:
+                    collection.delete(where=filter)
+        except Exception as e:
+            # If collection doesn't exist, that's fine - nothing to delete
+            log.debug(
+                f"Attempted to delete from non-existent collection {collection_name}. Ignoring."
+            )
+            pass
 
     def reset(self):
         # Resets the database. This will delete all collections and item entries.

+ 4 - 1
backend/open_webui/retrieval/vector/dbs/milvus.py

@@ -64,7 +64,10 @@ class MilvusClient:
 
             for item in match:
                 _ids.append(item.get("id"))
-                _distances.append(item.get("distance"))
+                # normalize milvus score from [-1, 1] to [0, 1] range
+                # https://milvus.io/docs/de/metric.md
+                _dist = (item.get("distance") + 1.0) / 2.0
+                _distances.append(_dist)
                 _documents.append(item.get("entity", {}).get("data", {}).get("text"))
                 _metadatas.append(item.get("entity", {}).get("metadata"))
 

+ 110 - 65
backend/open_webui/retrieval/vector/dbs/opensearch.py

@@ -1,4 +1,5 @@
 from opensearchpy import OpenSearch
+from opensearchpy.helpers import bulk
 from typing import Optional
 
 from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
@@ -21,7 +22,13 @@ class OpenSearchClient:
             http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD),
         )
 
+    def _get_index_name(self, collection_name: str) -> str:
+        return f"{self.index_prefix}_{collection_name}"
+
     def _result_to_get_result(self, result) -> GetResult:
+        if not result["hits"]["hits"]:
+            return None
+
         ids = []
         documents = []
         metadatas = []
@@ -31,9 +38,12 @@ class OpenSearchClient:
             documents.append(hit["_source"].get("text"))
             metadatas.append(hit["_source"].get("metadata"))
 
-        return GetResult(ids=ids, documents=documents, metadatas=metadatas)
+        return GetResult(ids=[ids], documents=[documents], metadatas=[metadatas])
 
     def _result_to_search_result(self, result) -> SearchResult:
+        if not result["hits"]["hits"]:
+            return None
+
         ids = []
         distances = []
         documents = []
@@ -46,34 +56,40 @@ class OpenSearchClient:
             metadatas.append(hit["_source"].get("metadata"))
 
         return SearchResult(
-            ids=ids, distances=distances, documents=documents, metadatas=metadatas
+            ids=[ids],
+            distances=[distances],
+            documents=[documents],
+            metadatas=[metadatas],
         )
 
     def _create_index(self, collection_name: str, dimension: int):
         body = {
+            "settings": {"index": {"knn": True}},
             "mappings": {
                 "properties": {
                     "id": {"type": "keyword"},
                     "vector": {
-                        "type": "dense_vector",
-                        "dims": dimension,  # Adjust based on your vector dimensions
-                        "index": true,
+                        "type": "knn_vector",
+                        "dimension": dimension,  # Adjust based on your vector dimensions
+                        "index": True,
                         "similarity": "faiss",
                         "method": {
                             "name": "hnsw",
-                            "space_type": "ip",  # Use inner product to approximate cosine similarity
+                            "space_type": "innerproduct",  # Use inner product to approximate cosine similarity
                             "engine": "faiss",
-                            "ef_construction": 128,
-                            "m": 16,
+                            "parameters": {
+                                "ef_construction": 128,
+                                "m": 16,
+                            },
                         },
                     },
                     "text": {"type": "text"},
                     "metadata": {"type": "object"},
                 }
-            }
+            },
         }
         self.client.indices.create(
-            index=f"{self.index_prefix}_{collection_name}", body=body
+            index=self._get_index_name(collection_name), body=body
         )
 
     def _create_batches(self, items: list[VectorItem], batch_size=100):
@@ -83,39 +99,45 @@ class OpenSearchClient:
     def has_collection(self, collection_name: str) -> bool:
         # has_collection here means has index.
         # We are simply adapting to the norms of the other DBs.
-        return self.client.indices.exists(
-            index=f"{self.index_prefix}_{collection_name}"
-        )
+        return self.client.indices.exists(index=self._get_index_name(collection_name))
 
-    def delete_colleciton(self, collection_name: str):
+    def delete_collection(self, collection_name: str):
         # delete_collection here means delete index.
         # We are simply adapting to the norms of the other DBs.
-        self.client.indices.delete(index=f"{self.index_prefix}_{collection_name}")
+        self.client.indices.delete(index=self._get_index_name(collection_name))
 
     def search(
-        self, collection_name: str, vectors: list[list[float]], limit: int
+        self, collection_name: str, vectors: list[list[float | int]], limit: int
     ) -> Optional[SearchResult]:
-        query = {
-            "size": limit,
-            "_source": ["text", "metadata"],
-            "query": {
-                "script_score": {
-                    "query": {"match_all": {}},
-                    "script": {
-                        "source": "cosineSimilarity(params.vector, 'vector') + 1.0",
-                        "params": {
-                            "vector": vectors[0]
-                        },  # Assuming single query vector
-                    },
-                }
-            },
-        }
+        try:
+            if not self.has_collection(collection_name):
+                return None
+
+            query = {
+                "size": limit,
+                "_source": ["text", "metadata"],
+                "query": {
+                    "script_score": {
+                        "query": {"match_all": {}},
+                        "script": {
+                            "source": "(cosineSimilarity(params.query_value, doc[params.field]) + 1.0) / 2.0",
+                            "params": {
+                                "field": "vector",
+                                "query_value": vectors[0],
+                            },  # Assuming single query vector
+                        },
+                    }
+                },
+            }
 
-        result = self.client.search(
-            index=f"{self.index_prefix}_{collection_name}", body=query
-        )
+            result = self.client.search(
+                index=self._get_index_name(collection_name), body=query
+            )
+
+            return self._result_to_search_result(result)
 
-        return self._result_to_search_result(result)
+        except Exception as e:
+            return None
 
     def query(
         self, collection_name: str, filter: dict, limit: Optional[int] = None
@@ -129,13 +151,15 @@ class OpenSearchClient:
         }
 
         for field, value in filter.items():
-            query_body["query"]["bool"]["filter"].append({"term": {field: value}})
+            query_body["query"]["bool"]["filter"].append(
+                {"match": {"metadata." + str(field): value}}
+            )
 
         size = limit if limit else 10
 
         try:
             result = self.client.search(
-                index=f"{self.index_prefix}_{collection_name}",
+                index=self._get_index_name(collection_name),
                 body=query_body,
                 size=size,
             )
@@ -146,14 +170,14 @@ class OpenSearchClient:
             return None
 
     def _create_index_if_not_exists(self, collection_name: str, dimension: int):
-        if not self.has_index(collection_name):
+        if not self.has_collection(collection_name):
             self._create_index(collection_name, dimension)
 
     def get(self, collection_name: str) -> Optional[GetResult]:
         query = {"query": {"match_all": {}}, "_source": ["text", "metadata"]}
 
         result = self.client.search(
-            index=f"{self.index_prefix}_{collection_name}", body=query
+            index=self._get_index_name(collection_name), body=query
         )
         return self._result_to_get_result(result)
 
@@ -165,18 +189,18 @@ class OpenSearchClient:
         for batch in self._create_batches(items):
             actions = [
                 {
-                    "index": {
-                        "_id": item["id"],
-                        "_source": {
-                            "vector": item["vector"],
-                            "text": item["text"],
-                            "metadata": item["metadata"],
-                        },
-                    }
+                    "_op_type": "index",
+                    "_index": self._get_index_name(collection_name),
+                    "_id": item["id"],
+                    "_source": {
+                        "vector": item["vector"],
+                        "text": item["text"],
+                        "metadata": item["metadata"],
+                    },
                 }
                 for item in batch
             ]
-            self.client.bulk(actions)
+            bulk(self.client, actions)
 
     def upsert(self, collection_name: str, items: list[VectorItem]):
         self._create_index_if_not_exists(
@@ -186,26 +210,47 @@ class OpenSearchClient:
         for batch in self._create_batches(items):
             actions = [
                 {
-                    "index": {
-                        "_id": item["id"],
-                        "_index": f"{self.index_prefix}_{collection_name}",
-                        "_source": {
-                            "vector": item["vector"],
-                            "text": item["text"],
-                            "metadata": item["metadata"],
-                        },
-                    }
+                    "_op_type": "update",
+                    "_index": self._get_index_name(collection_name),
+                    "_id": item["id"],
+                    "doc": {
+                        "vector": item["vector"],
+                        "text": item["text"],
+                        "metadata": item["metadata"],
+                    },
+                    "doc_as_upsert": True,
                 }
                 for item in batch
             ]
-            self.client.bulk(actions)
-
-    def delete(self, collection_name: str, ids: list[str]):
-        actions = [
-            {"delete": {"_index": f"{self.index_prefix}_{collection_name}", "_id": id}}
-            for id in ids
-        ]
-        self.client.bulk(body=actions)
+            bulk(self.client, actions)
+
+    def delete(
+        self,
+        collection_name: str,
+        ids: Optional[list[str]] = None,
+        filter: Optional[dict] = None,
+    ):
+        if ids:
+            actions = [
+                {
+                    "_op_type": "delete",
+                    "_index": self._get_index_name(collection_name),
+                    "_id": id,
+                }
+                for id in ids
+            ]
+            bulk(self.client, actions)
+        elif filter:
+            query_body = {
+                "query": {"bool": {"filter": []}},
+            }
+            for field, value in filter.items():
+                query_body["query"]["bool"]["filter"].append(
+                    {"match": {"metadata." + str(field): value}}
+                )
+            self.client.delete_by_query(
+                index=self._get_index_name(collection_name), body=query_body
+            )
 
     def reset(self):
         indices = self.client.indices.get(index=f"{self.index_prefix}_*")

+ 3 - 1
backend/open_webui/retrieval/vector/dbs/pgvector.py

@@ -278,7 +278,9 @@ class PgvectorClient:
             for row in results:
                 qid = int(row.qid)
                 ids[qid].append(row.id)
-                distances[qid].append(row.distance)
+                # normalize and re-orders pgvec distance from [2, 0] to [0, 1] score range
+                # https://github.com/pgvector/pgvector?tab=readme-ov-file#querying
+                distances[qid].append((2.0 - row.distance) / 2.0)
                 documents[qid].append(row.text)
                 metadatas[qid].append(row.vmetadata)
 

+ 2 - 1
backend/open_webui/retrieval/vector/dbs/qdrant.py

@@ -99,7 +99,8 @@ class QdrantClient:
             ids=get_result.ids,
             documents=get_result.documents,
             metadatas=get_result.metadatas,
-            distances=[[point.score for point in query_response.points]],
+            # qdrant distance is [-1, 1], normalize to [0, 1]
+            distances=[[(point.score + 1.0) / 2.0 for point in query_response.points]],
         )
 
     def query(self, collection_name: str, filter: dict, limit: Optional[int] = None):

+ 172 - 76
backend/open_webui/retrieval/web/utils.py

@@ -24,13 +24,17 @@ from langchain_community.document_loaders import PlaywrightURLLoader, WebBaseLoa
 from langchain_community.document_loaders.firecrawl import FireCrawlLoader
 from langchain_community.document_loaders.base import BaseLoader
 from langchain_core.documents import Document
+from open_webui.retrieval.loaders.tavily import TavilyLoader
 from open_webui.constants import ERROR_MESSAGES
 from open_webui.config import (
     ENABLE_RAG_LOCAL_WEB_FETCH,
     PLAYWRIGHT_WS_URI,
+    PLAYWRIGHT_TIMEOUT,
     RAG_WEB_LOADER_ENGINE,
     FIRECRAWL_API_BASE_URL,
     FIRECRAWL_API_KEY,
+    TAVILY_API_KEY,
+    TAVILY_EXTRACT_DEPTH,
 )
 from open_webui.env import SRC_LOG_LEVELS
 
@@ -113,7 +117,47 @@ def verify_ssl_cert(url: str) -> bool:
         return False
 
 
-class SafeFireCrawlLoader(BaseLoader):
+class RateLimitMixin:
+    async def _wait_for_rate_limit(self):
+        """Wait to respect the rate limit if specified."""
+        if self.requests_per_second and self.last_request_time:
+            min_interval = timedelta(seconds=1.0 / self.requests_per_second)
+            time_since_last = datetime.now() - self.last_request_time
+            if time_since_last < min_interval:
+                await asyncio.sleep((min_interval - time_since_last).total_seconds())
+        self.last_request_time = datetime.now()
+
+    def _sync_wait_for_rate_limit(self):
+        """Synchronous version of rate limit wait."""
+        if self.requests_per_second and self.last_request_time:
+            min_interval = timedelta(seconds=1.0 / self.requests_per_second)
+            time_since_last = datetime.now() - self.last_request_time
+            if time_since_last < min_interval:
+                time.sleep((min_interval - time_since_last).total_seconds())
+        self.last_request_time = datetime.now()
+
+
+class URLProcessingMixin:
+    def _verify_ssl_cert(self, url: str) -> bool:
+        """Verify SSL certificate for a URL."""
+        return verify_ssl_cert(url)
+
+    async def _safe_process_url(self, url: str) -> bool:
+        """Perform safety checks before processing a URL."""
+        if self.verify_ssl and not self._verify_ssl_cert(url):
+            raise ValueError(f"SSL certificate verification failed for {url}")
+        await self._wait_for_rate_limit()
+        return True
+
+    def _safe_process_url_sync(self, url: str) -> bool:
+        """Synchronous version of safety checks."""
+        if self.verify_ssl and not self._verify_ssl_cert(url):
+            raise ValueError(f"SSL certificate verification failed for {url}")
+        self._sync_wait_for_rate_limit()
+        return True
+
+
+class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
     def __init__(
         self,
         web_paths,
@@ -184,7 +228,7 @@ class SafeFireCrawlLoader(BaseLoader):
                 yield from loader.lazy_load()
             except Exception as e:
                 if self.continue_on_failure:
-                    log.exception(e, "Error loading %s", url)
+                    log.exception(f"Error loading {url}: {e}")
                     continue
                 raise e
 
@@ -204,47 +248,124 @@ class SafeFireCrawlLoader(BaseLoader):
                     yield document
             except Exception as e:
                 if self.continue_on_failure:
-                    log.exception(e, "Error loading %s", url)
+                    log.exception(f"Error loading {url}: {e}")
                     continue
                 raise e
 
-    def _verify_ssl_cert(self, url: str) -> bool:
-        return verify_ssl_cert(url)
 
-    async def _wait_for_rate_limit(self):
-        """Wait to respect the rate limit if specified."""
-        if self.requests_per_second and self.last_request_time:
-            min_interval = timedelta(seconds=1.0 / self.requests_per_second)
-            time_since_last = datetime.now() - self.last_request_time
-            if time_since_last < min_interval:
-                await asyncio.sleep((min_interval - time_since_last).total_seconds())
-        self.last_request_time = datetime.now()
+class SafeTavilyLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
+    def __init__(
+        self,
+        web_paths: Union[str, List[str]],
+        api_key: str,
+        extract_depth: Literal["basic", "advanced"] = "basic",
+        continue_on_failure: bool = True,
+        requests_per_second: Optional[float] = None,
+        verify_ssl: bool = True,
+        trust_env: bool = False,
+        proxy: Optional[Dict[str, str]] = None,
+    ):
+        """Initialize SafeTavilyLoader with rate limiting and SSL verification support.
 
-    def _sync_wait_for_rate_limit(self):
-        """Synchronous version of rate limit wait."""
-        if self.requests_per_second and self.last_request_time:
-            min_interval = timedelta(seconds=1.0 / self.requests_per_second)
-            time_since_last = datetime.now() - self.last_request_time
-            if time_since_last < min_interval:
-                time.sleep((min_interval - time_since_last).total_seconds())
-        self.last_request_time = datetime.now()
+        Args:
+            web_paths: List of URLs/paths to process.
+            api_key: The Tavily API key.
+            extract_depth: Depth of extraction ("basic" or "advanced").
+            continue_on_failure: Whether to continue if extraction of a URL fails.
+            requests_per_second: Number of requests per second to limit to.
+            verify_ssl: If True, verify SSL certificates.
+            trust_env: If True, use proxy settings from environment variables.
+            proxy: Optional proxy configuration.
+        """
+        # Initialize proxy configuration if using environment variables
+        proxy_server = proxy.get("server") if proxy else None
+        if trust_env and not proxy_server:
+            env_proxies = urllib.request.getproxies()
+            env_proxy_server = env_proxies.get("https") or env_proxies.get("http")
+            if env_proxy_server:
+                if proxy:
+                    proxy["server"] = env_proxy_server
+                else:
+                    proxy = {"server": env_proxy_server}
 
-    async def _safe_process_url(self, url: str) -> bool:
-        """Perform safety checks before processing a URL."""
-        if self.verify_ssl and not self._verify_ssl_cert(url):
-            raise ValueError(f"SSL certificate verification failed for {url}")
-        await self._wait_for_rate_limit()
-        return True
+        # Store parameters for creating TavilyLoader instances
+        self.web_paths = web_paths if isinstance(web_paths, list) else [web_paths]
+        self.api_key = api_key
+        self.extract_depth = extract_depth
+        self.continue_on_failure = continue_on_failure
+        self.verify_ssl = verify_ssl
+        self.trust_env = trust_env
+        self.proxy = proxy
 
-    def _safe_process_url_sync(self, url: str) -> bool:
-        """Synchronous version of safety checks."""
-        if self.verify_ssl and not self._verify_ssl_cert(url):
-            raise ValueError(f"SSL certificate verification failed for {url}")
-        self._sync_wait_for_rate_limit()
-        return True
+        # Add rate limiting
+        self.requests_per_second = requests_per_second
+        self.last_request_time = None
 
+    def lazy_load(self) -> Iterator[Document]:
+        """Load documents with rate limiting support, delegating to TavilyLoader."""
+        valid_urls = []
+        for url in self.web_paths:
+            try:
+                self._safe_process_url_sync(url)
+                valid_urls.append(url)
+            except Exception as e:
+                log.warning(f"SSL verification failed for {url}: {str(e)}")
+                if not self.continue_on_failure:
+                    raise e
+        if not valid_urls:
+            if self.continue_on_failure:
+                log.warning("No valid URLs to process after SSL verification")
+                return
+            raise ValueError("No valid URLs to process after SSL verification")
+        try:
+            loader = TavilyLoader(
+                urls=valid_urls,
+                api_key=self.api_key,
+                extract_depth=self.extract_depth,
+                continue_on_failure=self.continue_on_failure,
+            )
+            yield from loader.lazy_load()
+        except Exception as e:
+            if self.continue_on_failure:
+                log.exception(f"Error extracting content from URLs: {e}")
+            else:
+                raise e
 
-class SafePlaywrightURLLoader(PlaywrightURLLoader):
+    async def alazy_load(self) -> AsyncIterator[Document]:
+        """Async version with rate limiting and SSL verification."""
+        valid_urls = []
+        for url in self.web_paths:
+            try:
+                await self._safe_process_url(url)
+                valid_urls.append(url)
+            except Exception as e:
+                log.warning(f"SSL verification failed for {url}: {str(e)}")
+                if not self.continue_on_failure:
+                    raise e
+
+        if not valid_urls:
+            if self.continue_on_failure:
+                log.warning("No valid URLs to process after SSL verification")
+                return
+            raise ValueError("No valid URLs to process after SSL verification")
+
+        try:
+            loader = TavilyLoader(
+                urls=valid_urls,
+                api_key=self.api_key,
+                extract_depth=self.extract_depth,
+                continue_on_failure=self.continue_on_failure,
+            )
+            async for document in loader.alazy_load():
+                yield document
+        except Exception as e:
+            if self.continue_on_failure:
+                log.exception(f"Error loading URLs: {e}")
+            else:
+                raise e
+
+
+class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessingMixin):
     """Load HTML pages safely with Playwright, supporting SSL verification, rate limiting, and remote browser connection.
 
     Attributes:
@@ -256,6 +377,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
         headless (bool): If True, the browser will run in headless mode.
         proxy (dict): Proxy override settings for the Playwright session.
         playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection.
+        playwright_timeout (Optional[int]): Maximum operation time in milliseconds.
     """
 
     def __init__(
@@ -269,6 +391,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
         remove_selectors: Optional[List[str]] = None,
         proxy: Optional[Dict[str, str]] = None,
         playwright_ws_url: Optional[str] = None,
+        playwright_timeout: Optional[int] = 10000,
     ):
         """Initialize with additional safety parameters and remote browser support."""
 
@@ -295,6 +418,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
         self.last_request_time = None
         self.playwright_ws_url = playwright_ws_url
         self.trust_env = trust_env
+        self.playwright_timeout = playwright_timeout
 
     def lazy_load(self) -> Iterator[Document]:
         """Safely load URLs synchronously with support for remote browser."""
@@ -311,7 +435,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
                 try:
                     self._safe_process_url_sync(url)
                     page = browser.new_page()
-                    response = page.goto(url)
+                    response = page.goto(url, timeout=self.playwright_timeout)
                     if response is None:
                         raise ValueError(f"page.goto() returned None for url {url}")
 
@@ -320,7 +444,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
                     yield Document(page_content=text, metadata=metadata)
                 except Exception as e:
                     if self.continue_on_failure:
-                        log.exception(e, "Error loading %s", url)
+                        log.exception(f"Error loading {url}: {e}")
                         continue
                     raise e
             browser.close()
@@ -342,7 +466,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
                 try:
                     await self._safe_process_url(url)
                     page = await browser.new_page()
-                    response = await page.goto(url)
+                    response = await page.goto(url, timeout=self.playwright_timeout)
                     if response is None:
                         raise ValueError(f"page.goto() returned None for url {url}")
 
@@ -351,46 +475,11 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
                     yield Document(page_content=text, metadata=metadata)
                 except Exception as e:
                     if self.continue_on_failure:
-                        log.exception(e, "Error loading %s", url)
+                        log.exception(f"Error loading {url}: {e}")
                         continue
                     raise e
             await browser.close()
 
-    def _verify_ssl_cert(self, url: str) -> bool:
-        return verify_ssl_cert(url)
-
-    async def _wait_for_rate_limit(self):
-        """Wait to respect the rate limit if specified."""
-        if self.requests_per_second and self.last_request_time:
-            min_interval = timedelta(seconds=1.0 / self.requests_per_second)
-            time_since_last = datetime.now() - self.last_request_time
-            if time_since_last < min_interval:
-                await asyncio.sleep((min_interval - time_since_last).total_seconds())
-        self.last_request_time = datetime.now()
-
-    def _sync_wait_for_rate_limit(self):
-        """Synchronous version of rate limit wait."""
-        if self.requests_per_second and self.last_request_time:
-            min_interval = timedelta(seconds=1.0 / self.requests_per_second)
-            time_since_last = datetime.now() - self.last_request_time
-            if time_since_last < min_interval:
-                time.sleep((min_interval - time_since_last).total_seconds())
-        self.last_request_time = datetime.now()
-
-    async def _safe_process_url(self, url: str) -> bool:
-        """Perform safety checks before processing a URL."""
-        if self.verify_ssl and not self._verify_ssl_cert(url):
-            raise ValueError(f"SSL certificate verification failed for {url}")
-        await self._wait_for_rate_limit()
-        return True
-
-    def _safe_process_url_sync(self, url: str) -> bool:
-        """Synchronous version of safety checks."""
-        if self.verify_ssl and not self._verify_ssl_cert(url):
-            raise ValueError(f"SSL certificate verification failed for {url}")
-        self._sync_wait_for_rate_limit()
-        return True
-
 
 class SafeWebBaseLoader(WebBaseLoader):
     """WebBaseLoader with enhanced error handling for URLs."""
@@ -472,7 +561,7 @@ class SafeWebBaseLoader(WebBaseLoader):
                 yield Document(page_content=text, metadata=metadata)
             except Exception as e:
                 # Log the error and continue with the next URL
-                log.exception(e, "Error loading %s", path)
+                log.exception(f"Error loading {path}: {e}")
 
     async def alazy_load(self) -> AsyncIterator[Document]:
         """Async lazy load text from the url(s) in web_path."""
@@ -499,6 +588,7 @@ RAG_WEB_LOADER_ENGINES = defaultdict(lambda: SafeWebBaseLoader)
 RAG_WEB_LOADER_ENGINES["playwright"] = SafePlaywrightURLLoader
 RAG_WEB_LOADER_ENGINES["safe_web"] = SafeWebBaseLoader
 RAG_WEB_LOADER_ENGINES["firecrawl"] = SafeFireCrawlLoader
+RAG_WEB_LOADER_ENGINES["tavily"] = SafeTavilyLoader
 
 
 def get_web_loader(
@@ -518,13 +608,19 @@ def get_web_loader(
         "trust_env": trust_env,
     }
 
-    if PLAYWRIGHT_WS_URI.value:
-        web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value
+    if RAG_WEB_LOADER_ENGINE.value == "playwright":
+        web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value * 1000
+        if PLAYWRIGHT_WS_URI.value:
+            web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value
 
     if RAG_WEB_LOADER_ENGINE.value == "firecrawl":
         web_loader_args["api_key"] = FIRECRAWL_API_KEY.value
         web_loader_args["api_url"] = FIRECRAWL_API_BASE_URL.value
 
+    if RAG_WEB_LOADER_ENGINE.value == "tavily":
+        web_loader_args["api_key"] = TAVILY_API_KEY.value
+        web_loader_args["extract_depth"] = TAVILY_EXTRACT_DEPTH.value
+
     # Create the appropriate WebLoader based on the configuration
     WebLoaderClass = RAG_WEB_LOADER_ENGINES[RAG_WEB_LOADER_ENGINE.value]
     web_loader = WebLoaderClass(**web_loader_args)

+ 3 - 1
backend/open_webui/routers/audio.py

@@ -625,7 +625,9 @@ def transcription(
 ):
     log.info(f"file.content_type: {file.content_type}")
 
-    if file.content_type not in ["audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a"]:
+    supported_filetypes = ("audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a")
+
+    if not file.content_type.startswith(supported_filetypes):
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,

+ 8 - 3
backend/open_webui/routers/auths.py

@@ -210,7 +210,7 @@ async def ldap_auth(request: Request, response: Response, form_data: LdapForm):
             LDAP_APP_DN,
             LDAP_APP_PASSWORD,
             auto_bind="NONE",
-            authentication="SIMPLE",
+            authentication="SIMPLE" if LDAP_APP_DN else "ANONYMOUS",
         )
         if not connection_app.bind():
             raise HTTPException(400, detail="Application account bind failed")
@@ -639,11 +639,12 @@ async def get_admin_config(request: Request, user=Depends(get_admin_user)):
         "ENABLE_API_KEY": request.app.state.config.ENABLE_API_KEY,
         "ENABLE_API_KEY_ENDPOINT_RESTRICTIONS": request.app.state.config.ENABLE_API_KEY_ENDPOINT_RESTRICTIONS,
         "API_KEY_ALLOWED_ENDPOINTS": request.app.state.config.API_KEY_ALLOWED_ENDPOINTS,
-        "ENABLE_CHANNELS": request.app.state.config.ENABLE_CHANNELS,
         "DEFAULT_USER_ROLE": request.app.state.config.DEFAULT_USER_ROLE,
         "JWT_EXPIRES_IN": request.app.state.config.JWT_EXPIRES_IN,
         "ENABLE_COMMUNITY_SHARING": request.app.state.config.ENABLE_COMMUNITY_SHARING,
         "ENABLE_MESSAGE_RATING": request.app.state.config.ENABLE_MESSAGE_RATING,
+        "ENABLE_CHANNELS": request.app.state.config.ENABLE_CHANNELS,
+        "ENABLE_USER_WEBHOOKS": request.app.state.config.ENABLE_USER_WEBHOOKS,
     }
 
 
@@ -654,11 +655,12 @@ class AdminConfig(BaseModel):
     ENABLE_API_KEY: bool
     ENABLE_API_KEY_ENDPOINT_RESTRICTIONS: bool
     API_KEY_ALLOWED_ENDPOINTS: str
-    ENABLE_CHANNELS: bool
     DEFAULT_USER_ROLE: str
     JWT_EXPIRES_IN: str
     ENABLE_COMMUNITY_SHARING: bool
     ENABLE_MESSAGE_RATING: bool
+    ENABLE_CHANNELS: bool
+    ENABLE_USER_WEBHOOKS: bool
 
 
 @router.post("/admin/config")
@@ -693,6 +695,8 @@ async def update_admin_config(
     )
     request.app.state.config.ENABLE_MESSAGE_RATING = form_data.ENABLE_MESSAGE_RATING
 
+    request.app.state.config.ENABLE_USER_WEBHOOKS = form_data.ENABLE_USER_WEBHOOKS
+
     return {
         "SHOW_ADMIN_DETAILS": request.app.state.config.SHOW_ADMIN_DETAILS,
         "WEBUI_URL": request.app.state.config.WEBUI_URL,
@@ -705,6 +709,7 @@ async def update_admin_config(
         "JWT_EXPIRES_IN": request.app.state.config.JWT_EXPIRES_IN,
         "ENABLE_COMMUNITY_SHARING": request.app.state.config.ENABLE_COMMUNITY_SHARING,
         "ENABLE_MESSAGE_RATING": request.app.state.config.ENABLE_MESSAGE_RATING,
+        "ENABLE_USER_WEBHOOKS": request.app.state.config.ENABLE_USER_WEBHOOKS,
     }
 
 

+ 103 - 0
backend/open_webui/routers/chats.py

@@ -2,6 +2,8 @@ import json
 import logging
 from typing import Optional
 
+
+from open_webui.socket.main import get_event_emitter
 from open_webui.models.chats import (
     ChatForm,
     ChatImportForm,
@@ -372,6 +374,107 @@ async def update_chat_by_id(
         )
 
 
+############################
+# UpdateChatMessageById
+############################
+class MessageForm(BaseModel):
+    content: str
+
+
+@router.post("/{id}/messages/{message_id}", response_model=Optional[ChatResponse])
+async def update_chat_message_by_id(
+    id: str, message_id: str, form_data: MessageForm, user=Depends(get_verified_user)
+):
+    chat = Chats.get_chat_by_id(id)
+
+    if not chat:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
+        )
+
+    if chat.user_id != user.id and user.role != "admin":
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
+        )
+
+    chat = Chats.upsert_message_to_chat_by_id_and_message_id(
+        id,
+        message_id,
+        {
+            "content": form_data.content,
+        },
+    )
+
+    event_emitter = get_event_emitter(
+        {
+            "user_id": user.id,
+            "chat_id": id,
+            "message_id": message_id,
+        },
+        False,
+    )
+
+    if event_emitter:
+        await event_emitter(
+            {
+                "type": "chat:message",
+                "data": {
+                    "chat_id": id,
+                    "message_id": message_id,
+                    "content": form_data.content,
+                },
+            }
+        )
+
+    return ChatResponse(**chat.model_dump())
+
+
+############################
+# SendChatMessageEventById
+############################
+class EventForm(BaseModel):
+    type: str
+    data: dict
+
+
+@router.post("/{id}/messages/{message_id}/event", response_model=Optional[bool])
+async def send_chat_message_event_by_id(
+    id: str, message_id: str, form_data: EventForm, user=Depends(get_verified_user)
+):
+    chat = Chats.get_chat_by_id(id)
+
+    if not chat:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
+        )
+
+    if chat.user_id != user.id and user.role != "admin":
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
+        )
+
+    event_emitter = get_event_emitter(
+        {
+            "user_id": user.id,
+            "chat_id": id,
+            "message_id": message_id,
+        }
+    )
+
+    try:
+        if event_emitter:
+            await event_emitter(form_data.model_dump())
+        else:
+            return False
+        return True
+    except:
+        return False
+
+
 ############################
 # DeleteChatById
 ############################

+ 16 - 2
backend/open_webui/routers/evaluations.py

@@ -56,8 +56,19 @@ async def update_config(
     }
 
 
+class FeedbackUserReponse(BaseModel):
+    id: str
+    name: str
+    email: str
+    role: str = "pending"
+
+    last_active_at: int  # timestamp in epoch
+    updated_at: int  # timestamp in epoch
+    created_at: int  # timestamp in epoch
+
+
 class FeedbackUserResponse(FeedbackResponse):
-    user: Optional[UserModel] = None
+    user: Optional[FeedbackUserReponse] = None
 
 
 @router.get("/feedbacks/all", response_model=list[FeedbackUserResponse])
@@ -65,7 +76,10 @@ async def get_all_feedbacks(user=Depends(get_admin_user)):
     feedbacks = Feedbacks.get_all_feedbacks()
     return [
         FeedbackUserResponse(
-            **feedback.model_dump(), user=Users.get_user_by_id(feedback.user_id)
+            **feedback.model_dump(),
+            user=FeedbackUserReponse(
+                **Users.get_user_by_id(feedback.user_id).model_dump()
+            ),
         )
         for feedback in feedbacks
     ]

+ 169 - 44
backend/open_webui/routers/files.py

@@ -5,7 +5,16 @@ from pathlib import Path
 from typing import Optional
 from urllib.parse import quote
 
-from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile, status
+from fastapi import (
+    APIRouter,
+    Depends,
+    File,
+    HTTPException,
+    Request,
+    UploadFile,
+    status,
+    Query,
+)
 from fastapi.responses import FileResponse, StreamingResponse
 from open_webui.constants import ERROR_MESSAGES
 from open_webui.env import SRC_LOG_LEVELS
@@ -15,6 +24,9 @@ from open_webui.models.files import (
     FileModelResponse,
     Files,
 )
+from open_webui.models.knowledge import Knowledges
+
+from open_webui.routers.knowledge import get_knowledge, get_knowledge_list
 from open_webui.routers.retrieval import ProcessFileForm, process_file
 from open_webui.routers.audio import transcribe
 from open_webui.storage.provider import Storage
@@ -27,6 +39,39 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
 
 router = APIRouter()
 
+
+############################
+# Check if the current user has access to a file through any knowledge bases the user may be in.
+############################
+
+
+def has_access_to_file(
+    file_id: Optional[str], access_type: str, user=Depends(get_verified_user)
+) -> bool:
+    file = Files.get_file_by_id(file_id)
+    log.debug(f"Checking if user has {access_type} access to file")
+
+    if not file:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=ERROR_MESSAGES.NOT_FOUND,
+        )
+
+    has_access = False
+    knowledge_base_id = file.meta.get("collection_name") if file.meta else None
+
+    if knowledge_base_id:
+        knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(
+            user.id, access_type
+        )
+        for knowledge_base in knowledge_bases:
+            if knowledge_base.id == knowledge_base_id:
+                has_access = True
+                break
+
+    return has_access
+
+
 ############################
 # Upload File
 ############################
@@ -38,6 +83,7 @@ def upload_file(
     file: UploadFile = File(...),
     user=Depends(get_verified_user),
     file_metadata: dict = {},
+    process: bool = Query(True),
 ):
     log.info(f"file.content_type: {file.content_type}")
     try:
@@ -66,34 +112,33 @@ def upload_file(
                 }
             ),
         )
-
-        try:
-            if file.content_type in [
-                "audio/mpeg",
-                "audio/wav",
-                "audio/ogg",
-                "audio/x-m4a",
-            ]:
-                file_path = Storage.get_file(file_path)
-                result = transcribe(request, file_path)
-                process_file(
-                    request,
-                    ProcessFileForm(file_id=id, content=result.get("text", "")),
-                    user=user,
+        if process:
+            try:
+                if file.content_type in [
+                    "audio/mpeg",
+                    "audio/wav",
+                    "audio/ogg",
+                    "audio/x-m4a",
+                ]:
+                    file_path = Storage.get_file(file_path)
+                    result = transcribe(request, file_path)
+                    process_file(
+                        request,
+                        ProcessFileForm(file_id=id, content=result.get("text", "")),
+                        user=user,
+                    )
+                elif file.content_type not in ["image/png", "image/jpeg", "image/gif"]:
+                    process_file(request, ProcessFileForm(file_id=id), user=user)
+                    file_item = Files.get_file_by_id(id=id)
+            except Exception as e:
+                log.exception(e)
+                log.error(f"Error processing file: {file_item.id}")
+                file_item = FileModelResponse(
+                    **{
+                        **file_item.model_dump(),
+                        "error": str(e.detail) if hasattr(e, "detail") else str(e),
+                    }
                 )
-            else:
-                process_file(request, ProcessFileForm(file_id=id), user=user)
-
-            file_item = Files.get_file_by_id(id=id)
-        except Exception as e:
-            log.exception(e)
-            log.error(f"Error processing file: {file_item.id}")
-            file_item = FileModelResponse(
-                **{
-                    **file_item.model_dump(),
-                    "error": str(e.detail) if hasattr(e, "detail") else str(e),
-                }
-            )
 
         if file_item:
             return file_item
@@ -160,7 +205,17 @@ async def delete_all_files(user=Depends(get_admin_user)):
 async def get_file_by_id(id: str, user=Depends(get_verified_user)):
     file = Files.get_file_by_id(id)
 
-    if file and (file.user_id == user.id or user.role == "admin"):
+    if not file:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=ERROR_MESSAGES.NOT_FOUND,
+        )
+
+    if (
+        file.user_id == user.id
+        or user.role == "admin"
+        or has_access_to_file(id, "read", user)
+    ):
         return file
     else:
         raise HTTPException(
@@ -178,7 +233,17 @@ async def get_file_by_id(id: str, user=Depends(get_verified_user)):
 async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
     file = Files.get_file_by_id(id)
 
-    if file and (file.user_id == user.id or user.role == "admin"):
+    if not file:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=ERROR_MESSAGES.NOT_FOUND,
+        )
+
+    if (
+        file.user_id == user.id
+        or user.role == "admin"
+        or has_access_to_file(id, "read", user)
+    ):
         return {"content": file.data.get("content", "")}
     else:
         raise HTTPException(
@@ -202,7 +267,17 @@ async def update_file_data_content_by_id(
 ):
     file = Files.get_file_by_id(id)
 
-    if file and (file.user_id == user.id or user.role == "admin"):
+    if not file:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=ERROR_MESSAGES.NOT_FOUND,
+        )
+
+    if (
+        file.user_id == user.id
+        or user.role == "admin"
+        or has_access_to_file(id, "write", user)
+    ):
         try:
             process_file(
                 request,
@@ -228,9 +303,22 @@ async def update_file_data_content_by_id(
 
 
 @router.get("/{id}/content")
-async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
+async def get_file_content_by_id(
+    id: str, user=Depends(get_verified_user), attachment: bool = Query(False)
+):
     file = Files.get_file_by_id(id)
-    if file and (file.user_id == user.id or user.role == "admin"):
+
+    if not file:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=ERROR_MESSAGES.NOT_FOUND,
+        )
+
+    if (
+        file.user_id == user.id
+        or user.role == "admin"
+        or has_access_to_file(id, "read", user)
+    ):
         try:
             file_path = Storage.get_file(file.path)
             file_path = Path(file_path)
@@ -246,17 +334,22 @@ async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
                 encoded_filename = quote(filename)
                 headers = {}
 
-                if content_type == "application/pdf" or filename.lower().endswith(
-                    ".pdf"
-                ):
-                    headers["Content-Disposition"] = (
-                        f"inline; filename*=UTF-8''{encoded_filename}"
-                    )
-                    content_type = "application/pdf"
-                elif content_type != "text/plain":
+                if attachment:
                     headers["Content-Disposition"] = (
                         f"attachment; filename*=UTF-8''{encoded_filename}"
                     )
+                else:
+                    if content_type == "application/pdf" or filename.lower().endswith(
+                        ".pdf"
+                    ):
+                        headers["Content-Disposition"] = (
+                            f"inline; filename*=UTF-8''{encoded_filename}"
+                        )
+                        content_type = "application/pdf"
+                    elif content_type != "text/plain":
+                        headers["Content-Disposition"] = (
+                            f"attachment; filename*=UTF-8''{encoded_filename}"
+                        )
 
                 return FileResponse(file_path, headers=headers, media_type=content_type)
 
@@ -282,7 +375,18 @@ async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
 @router.get("/{id}/content/html")
 async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
     file = Files.get_file_by_id(id)
-    if file and (file.user_id == user.id or user.role == "admin"):
+
+    if not file:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=ERROR_MESSAGES.NOT_FOUND,
+        )
+
+    if (
+        file.user_id == user.id
+        or user.role == "admin"
+        or has_access_to_file(id, "read", user)
+    ):
         try:
             file_path = Storage.get_file(file.path)
             file_path = Path(file_path)
@@ -314,7 +418,17 @@ async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
 async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
     file = Files.get_file_by_id(id)
 
-    if file and (file.user_id == user.id or user.role == "admin"):
+    if not file:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=ERROR_MESSAGES.NOT_FOUND,
+        )
+
+    if (
+        file.user_id == user.id
+        or user.role == "admin"
+        or has_access_to_file(id, "read", user)
+    ):
         file_path = file.path
 
         # Handle Unicode filenames
@@ -365,7 +479,18 @@ async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
 @router.delete("/{id}")
 async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
     file = Files.get_file_by_id(id)
-    if file and (file.user_id == user.id or user.role == "admin"):
+
+    if not file:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=ERROR_MESSAGES.NOT_FOUND,
+        )
+
+    if (
+        file.user_id == user.id
+        or user.role == "admin"
+        or has_access_to_file(id, "write", user)
+    ):
         # We should add Chroma cleanup here
 
         result = Files.delete_file_by_id(id)

+ 15 - 2
backend/open_webui/routers/folders.py

@@ -20,11 +20,13 @@ from open_webui.env import SRC_LOG_LEVELS
 from open_webui.constants import ERROR_MESSAGES
 
 
-from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status, Request
 from fastapi.responses import FileResponse, StreamingResponse
 
 
 from open_webui.utils.auth import get_admin_user, get_verified_user
+from open_webui.utils.access_control import has_permission
+
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["MODELS"])
@@ -228,7 +230,18 @@ async def update_folder_is_expanded_by_id(
 
 
 @router.delete("/{id}")
-async def delete_folder_by_id(id: str, user=Depends(get_verified_user)):
+async def delete_folder_by_id(
+    request: Request, id: str, user=Depends(get_verified_user)
+):
+    chat_delete_permission = has_permission(
+        user.id, "chat.delete", request.app.state.config.USER_PERMISSIONS
+    )
+    if not chat_delete_permission:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
+        )
+
     folder = Folders.get_folder_by_id_and_user_id(id, user.id)
     if folder:
         try:

+ 2 - 4
backend/open_webui/routers/images.py

@@ -517,10 +517,8 @@ async def image_generations(
             images = []
 
             for image in res["data"]:
-                if "url" in image:
-                    image_data, content_type = load_url_image_data(
-                        image["url"], headers
-                    )
+                if image_url := image.get("url", None):
+                    image_data, content_type = load_url_image_data(image_url, headers)
                 else:
                     image_data, content_type = load_b64_image_data(image["b64_json"])
 

+ 17 - 7
backend/open_webui/routers/knowledge.py

@@ -437,14 +437,24 @@ def remove_file_from_knowledge_by_id(
         )
 
     # Remove content from the vector database
-    VECTOR_DB_CLIENT.delete(
-        collection_name=knowledge.id, filter={"file_id": form_data.file_id}
-    )
+    try:
+        VECTOR_DB_CLIENT.delete(
+            collection_name=knowledge.id, filter={"file_id": form_data.file_id}
+        )
+    except Exception as e:
+        log.debug("This was most likely caused by bypassing embedding processing")
+        log.debug(e)
+        pass
 
-    # Remove the file's collection from vector database
-    file_collection = f"file-{form_data.file_id}"
-    if VECTOR_DB_CLIENT.has_collection(collection_name=file_collection):
-        VECTOR_DB_CLIENT.delete_collection(collection_name=file_collection)
+    try:
+        # Remove the file's collection from vector database
+        file_collection = f"file-{form_data.file_id}"
+        if VECTOR_DB_CLIENT.has_collection(collection_name=file_collection):
+            VECTOR_DB_CLIENT.delete_collection(collection_name=file_collection)
+    except Exception as e:
+        log.debug("This was most likely caused by bypassing embedding processing")
+        log.debug(e)
+        pass
 
     # Delete file from database
     Files.delete_file_by_id(form_data.file_id)

+ 8 - 4
backend/open_webui/routers/memories.py

@@ -57,7 +57,9 @@ async def add_memory(
             {
                 "id": memory.id,
                 "text": memory.content,
-                "vector": request.app.state.EMBEDDING_FUNCTION(memory.content, user),
+                "vector": request.app.state.EMBEDDING_FUNCTION(
+                    memory.content, user=user
+                ),
                 "metadata": {"created_at": memory.created_at},
             }
         ],
@@ -82,7 +84,7 @@ async def query_memory(
 ):
     results = VECTOR_DB_CLIENT.search(
         collection_name=f"user-memory-{user.id}",
-        vectors=[request.app.state.EMBEDDING_FUNCTION(form_data.content, user)],
+        vectors=[request.app.state.EMBEDDING_FUNCTION(form_data.content, user=user)],
         limit=form_data.k,
     )
 
@@ -105,7 +107,9 @@ async def reset_memory_from_vector_db(
             {
                 "id": memory.id,
                 "text": memory.content,
-                "vector": request.app.state.EMBEDDING_FUNCTION(memory.content, user),
+                "vector": request.app.state.EMBEDDING_FUNCTION(
+                    memory.content, user=user
+                ),
                 "metadata": {
                     "created_at": memory.created_at,
                     "updated_at": memory.updated_at,
@@ -161,7 +165,7 @@ async def update_memory_by_id(
                     "id": memory.id,
                     "text": memory.content,
                     "vector": request.app.state.EMBEDDING_FUNCTION(
-                        memory.content, user
+                        memory.content, user=user
                     ),
                     "metadata": {
                         "created_at": memory.created_at,

+ 26 - 12
backend/open_webui/routers/ollama.py

@@ -295,7 +295,7 @@ async def update_config(
     }
 
 
-@cached(ttl=3)
+@cached(ttl=1)
 async def get_all_models(request: Request, user: UserModel = None):
     log.info("get_all_models()")
     if request.app.state.config.ENABLE_OLLAMA_API:
@@ -336,6 +336,7 @@ async def get_all_models(request: Request, user: UserModel = None):
                 )
 
                 prefix_id = api_config.get("prefix_id", None)
+                tags = api_config.get("tags", [])
                 model_ids = api_config.get("model_ids", [])
 
                 if len(model_ids) != 0 and "models" in response:
@@ -350,6 +351,10 @@ async def get_all_models(request: Request, user: UserModel = None):
                     for model in response.get("models", []):
                         model["model"] = f"{prefix_id}.{model['model']}"
 
+                if tags:
+                    for model in response.get("models", []):
+                        model["tags"] = tags
+
         def merge_models_lists(model_lists):
             merged_models = {}
 
@@ -460,18 +465,27 @@ async def get_ollama_versions(request: Request, url_idx: Optional[int] = None):
     if request.app.state.config.ENABLE_OLLAMA_API:
         if url_idx is None:
             # returns lowest version
-            request_tasks = [
-                send_get_request(
-                    f"{url}/api/version",
+            request_tasks = []
+
+            for idx, url in enumerate(request.app.state.config.OLLAMA_BASE_URLS):
+                api_config = request.app.state.config.OLLAMA_API_CONFIGS.get(
+                    str(idx),
                     request.app.state.config.OLLAMA_API_CONFIGS.get(
-                        str(idx),
-                        request.app.state.config.OLLAMA_API_CONFIGS.get(
-                            url, {}
-                        ),  # Legacy support
-                    ).get("key", None),
+                        url, {}
+                    ),  # Legacy support
                 )
-                for idx, url in enumerate(request.app.state.config.OLLAMA_BASE_URLS)
-            ]
+
+                enable = api_config.get("enable", True)
+                key = api_config.get("key", None)
+
+                if enable:
+                    request_tasks.append(
+                        send_get_request(
+                            f"{url}/api/version",
+                            key,
+                        )
+                    )
+
             responses = await asyncio.gather(*request_tasks)
             responses = list(filter(lambda x: x is not None, responses))
 
@@ -1164,7 +1178,7 @@ async def generate_chat_completion(
     prefix_id = api_config.get("prefix_id", None)
     if prefix_id:
         payload["model"] = payload["model"].replace(f"{prefix_id}.", "")
-
+    # payload["keep_alive"] = -1 # keep alive forever
     return await send_post_request(
         url=f"{url}/api/chat",
         payload=json.dumps(payload),

+ 32 - 13
backend/open_webui/routers/openai.py

@@ -36,6 +36,9 @@ from open_webui.utils.payload import (
     apply_model_params_to_body_openai,
     apply_model_system_prompt_to_body,
 )
+from open_webui.utils.misc import (
+    convert_logit_bias_input_to_json,
+)
 
 from open_webui.utils.auth import get_admin_user, get_verified_user
 from open_webui.utils.access_control import has_access
@@ -350,6 +353,7 @@ async def get_all_models_responses(request: Request, user: UserModel) -> list:
             )
 
             prefix_id = api_config.get("prefix_id", None)
+            tags = api_config.get("tags", [])
 
             if prefix_id:
                 for model in (
@@ -357,6 +361,12 @@ async def get_all_models_responses(request: Request, user: UserModel) -> list:
                 ):
                     model["id"] = f"{prefix_id}.{model['id']}"
 
+            if tags:
+                for model in (
+                    response if isinstance(response, list) else response.get("data", [])
+                ):
+                    model["tags"] = tags
+
     log.debug(f"get_all_models:responses() {responses}")
     return responses
 
@@ -374,7 +384,7 @@ async def get_filtered_models(models, user):
     return filtered_models
 
 
-@cached(ttl=3)
+@cached(ttl=1)
 async def get_all_models(request: Request, user: UserModel) -> dict[str, list]:
     log.info("get_all_models()")
 
@@ -396,6 +406,7 @@ async def get_all_models(request: Request, user: UserModel) -> dict[str, list]:
 
         for idx, models in enumerate(model_lists):
             if models is not None and "error" not in models:
+
                 merged_list.extend(
                     [
                         {
@@ -406,18 +417,21 @@ async def get_all_models(request: Request, user: UserModel) -> dict[str, list]:
                             "urlIdx": idx,
                         }
                         for model in models
-                        if "api.openai.com"
-                        not in request.app.state.config.OPENAI_API_BASE_URLS[idx]
-                        or not any(
-                            name in model["id"]
-                            for name in [
-                                "babbage",
-                                "dall-e",
-                                "davinci",
-                                "embedding",
-                                "tts",
-                                "whisper",
-                            ]
+                        if (model.get("id") or model.get("name"))
+                        and (
+                            "api.openai.com"
+                            not in request.app.state.config.OPENAI_API_BASE_URLS[idx]
+                            or not any(
+                                name in model["id"]
+                                for name in [
+                                    "babbage",
+                                    "dall-e",
+                                    "davinci",
+                                    "embedding",
+                                    "tts",
+                                    "whisper",
+                                ]
+                            )
                         )
                     ]
                 )
@@ -666,6 +680,11 @@ async def generate_chat_completion(
         del payload["max_tokens"]
 
     # Convert the modified body back to JSON
+    if "logit_bias" in payload:
+        payload["logit_bias"] = json.loads(
+            convert_logit_bias_input_to_json(payload["logit_bias"])
+        )
+
     payload = json.dumps(payload)
 
     r = None

+ 2 - 2
backend/open_webui/routers/pipelines.py

@@ -90,8 +90,8 @@ async def process_pipeline_inlet_filter(request, payload, user, models):
                     headers=headers,
                     json=request_data,
                 ) as response:
-                    response.raise_for_status()
                     payload = await response.json()
+                    response.raise_for_status()
             except aiohttp.ClientResponseError as e:
                 res = (
                     await response.json()
@@ -139,8 +139,8 @@ async def process_pipeline_outlet_filter(request, payload, user, models):
                     headers=headers,
                     json=request_data,
                 ) as response:
-                    response.raise_for_status()
                     payload = await response.json()
+                    response.raise_for_status()
             except aiohttp.ClientResponseError as e:
                 try:
                     res = (

+ 34 - 10
backend/open_webui/routers/retrieval.py

@@ -74,7 +74,6 @@ from open_webui.utils.misc import (
 )
 from open_webui.utils.auth import get_admin_user, get_verified_user
 
-
 from open_webui.config import (
     ENV,
     RAG_EMBEDDING_MODEL_AUTO_UPDATE,
@@ -83,6 +82,8 @@ from open_webui.config import (
     RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
     UPLOAD_DIR,
     DEFAULT_LOCALE,
+    RAG_EMBEDDING_CONTENT_PREFIX,
+    RAG_EMBEDDING_QUERY_PREFIX,
 )
 from open_webui.env import (
     SRC_LOG_LEVELS,
@@ -358,6 +359,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
         "content_extraction": {
             "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
             "tika_server_url": request.app.state.config.TIKA_SERVER_URL,
+            "docling_server_url": request.app.state.config.DOCLING_SERVER_URL,
             "document_intelligence_config": {
                 "endpoint": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
                 "key": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
@@ -428,6 +430,7 @@ class DocumentIntelligenceConfigForm(BaseModel):
 class ContentExtractionConfig(BaseModel):
     engine: str = ""
     tika_server_url: Optional[str] = None
+    docling_server_url: Optional[str] = None
     document_intelligence_config: Optional[DocumentIntelligenceConfigForm] = None
 
 
@@ -540,6 +543,9 @@ async def update_rag_config(
         request.app.state.config.TIKA_SERVER_URL = (
             form_data.content_extraction.tika_server_url
         )
+        request.app.state.config.DOCLING_SERVER_URL = (
+            form_data.content_extraction.docling_server_url
+        )
         if form_data.content_extraction.document_intelligence_config is not None:
             request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = (
                 form_data.content_extraction.document_intelligence_config.endpoint
@@ -648,6 +654,7 @@ async def update_rag_config(
         "content_extraction": {
             "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
             "tika_server_url": request.app.state.config.TIKA_SERVER_URL,
+            "docling_server_url": request.app.state.config.DOCLING_SERVER_URL,
             "document_intelligence_config": {
                 "endpoint": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
                 "key": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
@@ -713,6 +720,7 @@ async def get_query_settings(request: Request, user=Depends(get_admin_user)):
         "status": True,
         "template": request.app.state.config.RAG_TEMPLATE,
         "k": request.app.state.config.TOP_K,
+        "k_reranker": request.app.state.config.TOP_K_RERANKER,
         "r": request.app.state.config.RELEVANCE_THRESHOLD,
         "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
     }
@@ -720,6 +728,7 @@ async def get_query_settings(request: Request, user=Depends(get_admin_user)):
 
 class QuerySettingsForm(BaseModel):
     k: Optional[int] = None
+    k_reranker: Optional[int] = None
     r: Optional[float] = None
     template: Optional[str] = None
     hybrid: Optional[bool] = None
@@ -731,6 +740,7 @@ async def update_query_settings(
 ):
     request.app.state.config.RAG_TEMPLATE = form_data.template
     request.app.state.config.TOP_K = form_data.k if form_data.k else 4
+    request.app.state.config.TOP_K_RERANKER = form_data.k_reranker or 4
     request.app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
 
     request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = (
@@ -741,6 +751,7 @@ async def update_query_settings(
         "status": True,
         "template": request.app.state.config.RAG_TEMPLATE,
         "k": request.app.state.config.TOP_K,
+        "k_reranker": request.app.state.config.TOP_K_RERANKER,
         "r": request.app.state.config.RELEVANCE_THRESHOLD,
         "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
     }
@@ -881,7 +892,9 @@ def save_docs_to_vector_db(
         )
 
         embeddings = embedding_function(
-            list(map(lambda x: x.replace("\n", " "), texts)), user=user
+            list(map(lambda x: x.replace("\n", " "), texts)),
+            prefix=RAG_EMBEDDING_CONTENT_PREFIX,
+            user=user,
         )
 
         items = [
@@ -990,6 +1003,7 @@ def process_file(
                 loader = Loader(
                     engine=request.app.state.config.CONTENT_EXTRACTION_ENGINE,
                     TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL,
+                    DOCLING_SERVER_URL=request.app.state.config.DOCLING_SERVER_URL,
                     PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES,
                     DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
                     DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
@@ -1488,6 +1502,7 @@ class QueryDocForm(BaseModel):
     collection_name: str
     query: str
     k: Optional[int] = None
+    k_reranker: Optional[int] = None
     r: Optional[float] = None
     hybrid: Optional[bool] = None
 
@@ -1503,11 +1518,13 @@ def query_doc_handler(
             return query_doc_with_hybrid_search(
                 collection_name=form_data.collection_name,
                 query=form_data.query,
-                embedding_function=lambda query: request.app.state.EMBEDDING_FUNCTION(
-                    query, user=user
+                embedding_function=lambda query, prefix: request.app.state.EMBEDDING_FUNCTION(
+                    query, prefix=prefix, user=user
                 ),
                 k=form_data.k if form_data.k else request.app.state.config.TOP_K,
                 reranking_function=request.app.state.rf,
+                k_reranker=form_data.k_reranker
+                or request.app.state.config.TOP_K_RERANKER,
                 r=(
                     form_data.r
                     if form_data.r
@@ -1519,7 +1536,7 @@ def query_doc_handler(
             return query_doc(
                 collection_name=form_data.collection_name,
                 query_embedding=request.app.state.EMBEDDING_FUNCTION(
-                    form_data.query, user=user
+                    form_data.query, prefix=RAG_EMBEDDING_QUERY_PREFIX, user=user
                 ),
                 k=form_data.k if form_data.k else request.app.state.config.TOP_K,
                 user=user,
@@ -1536,6 +1553,7 @@ class QueryCollectionsForm(BaseModel):
     collection_names: list[str]
     query: str
     k: Optional[int] = None
+    k_reranker: Optional[int] = None
     r: Optional[float] = None
     hybrid: Optional[bool] = None
 
@@ -1551,11 +1569,13 @@ def query_collection_handler(
             return query_collection_with_hybrid_search(
                 collection_names=form_data.collection_names,
                 queries=[form_data.query],
-                embedding_function=lambda query: request.app.state.EMBEDDING_FUNCTION(
-                    query, user=user
+                embedding_function=lambda query, prefix: request.app.state.EMBEDDING_FUNCTION(
+                    query, prefix=prefix, user=user
                 ),
                 k=form_data.k if form_data.k else request.app.state.config.TOP_K,
                 reranking_function=request.app.state.rf,
+                k_reranker=form_data.k_reranker
+                or request.app.state.config.TOP_K_RERANKER,
                 r=(
                     form_data.r
                     if form_data.r
@@ -1566,8 +1586,8 @@ def query_collection_handler(
             return query_collection(
                 collection_names=form_data.collection_names,
                 queries=[form_data.query],
-                embedding_function=lambda query: request.app.state.EMBEDDING_FUNCTION(
-                    query, user=user
+                embedding_function=lambda query, prefix: request.app.state.EMBEDDING_FUNCTION(
+                    query, prefix=prefix, user=user
                 ),
                 k=form_data.k if form_data.k else request.app.state.config.TOP_K,
             )
@@ -1644,7 +1664,11 @@ if ENV == "dev":
 
     @router.get("/ef/{text}")
     async def get_embeddings(request: Request, text: Optional[str] = "Hello World!"):
-        return {"result": request.app.state.EMBEDDING_FUNCTION(text)}
+        return {
+            "result": request.app.state.EMBEDDING_FUNCTION(
+                text, prefix=RAG_EMBEDDING_QUERY_PREFIX
+            )
+        }
 
 
 class BatchProcessFilesForm(BaseModel):

+ 25 - 5
backend/open_webui/routers/users.py

@@ -2,6 +2,7 @@ import logging
 from typing import Optional
 
 from open_webui.models.auths import Auths
+from open_webui.models.groups import Groups
 from open_webui.models.chats import Chats
 from open_webui.models.users import (
     UserModel,
@@ -17,7 +18,10 @@ from open_webui.constants import ERROR_MESSAGES
 from open_webui.env import SRC_LOG_LEVELS
 from fastapi import APIRouter, Depends, HTTPException, Request, status
 from pydantic import BaseModel
+
 from open_webui.utils.auth import get_admin_user, get_password_hash, get_verified_user
+from open_webui.utils.access_control import get_permissions
+
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["MODELS"])
@@ -45,7 +49,7 @@ async def get_users(
 
 @router.get("/groups")
 async def get_user_groups(user=Depends(get_verified_user)):
-    return Users.get_user_groups(user.id)
+    return Groups.get_groups_by_member_id(user.id)
 
 
 ############################
@@ -54,8 +58,12 @@ async def get_user_groups(user=Depends(get_verified_user)):
 
 
 @router.get("/permissions")
-async def get_user_permissisions(user=Depends(get_verified_user)):
-    return Users.get_user_groups(user.id)
+async def get_user_permissisions(request: Request, user=Depends(get_verified_user)):
+    user_permissions = get_permissions(
+        user.id, request.app.state.config.USER_PERMISSIONS
+    )
+
+    return user_permissions
 
 
 ############################
@@ -68,12 +76,20 @@ class WorkspacePermissions(BaseModel):
     tools: bool = False
 
 
+class SharingPermissions(BaseModel):
+    public_models: bool = True
+    public_knowledge: bool = True
+    public_prompts: bool = True
+    public_tools: bool = True
+
+
 class ChatPermissions(BaseModel):
     controls: bool = True
     file_upload: bool = True
     delete: bool = True
     edit: bool = True
     temporary: bool = True
+    temporary_enforced: bool = False
 
 
 class FeaturesPermissions(BaseModel):
@@ -84,16 +100,20 @@ class FeaturesPermissions(BaseModel):
 
 class UserPermissions(BaseModel):
     workspace: WorkspacePermissions
+    sharing: SharingPermissions
     chat: ChatPermissions
     features: FeaturesPermissions
 
 
 @router.get("/default/permissions", response_model=UserPermissions)
-async def get_user_permissions(request: Request, user=Depends(get_admin_user)):
+async def get_default_user_permissions(request: Request, user=Depends(get_admin_user)):
     return {
         "workspace": WorkspacePermissions(
             **request.app.state.config.USER_PERMISSIONS.get("workspace", {})
         ),
+        "sharing": SharingPermissions(
+            **request.app.state.config.USER_PERMISSIONS.get("sharing", {})
+        ),
         "chat": ChatPermissions(
             **request.app.state.config.USER_PERMISSIONS.get("chat", {})
         ),
@@ -104,7 +124,7 @@ async def get_user_permissions(request: Request, user=Depends(get_admin_user)):
 
 
 @router.post("/default/permissions")
-async def update_user_permissions(
+async def update_default_user_permissions(
     request: Request, form_data: UserPermissions, user=Depends(get_admin_user)
 ):
     request.app.state.config.USER_PERMISSIONS = form_data.model_dump()

+ 85 - 40
backend/open_webui/socket/main.py

@@ -3,16 +3,24 @@ import socketio
 import logging
 import sys
 import time
+from redis import asyncio as aioredis
 
 from open_webui.models.users import Users, UserNameResponse
 from open_webui.models.channels import Channels
 from open_webui.models.chats import Chats
+from open_webui.utils.redis import (
+    parse_redis_sentinel_url,
+    get_sentinels_from_env,
+    AsyncRedisSentinelManager,
+)
 
 from open_webui.env import (
     ENABLE_WEBSOCKET_SUPPORT,
     WEBSOCKET_MANAGER,
     WEBSOCKET_REDIS_URL,
     WEBSOCKET_REDIS_LOCK_TIMEOUT,
+    WEBSOCKET_SENTINEL_PORT,
+    WEBSOCKET_SENTINEL_HOSTS,
 )
 from open_webui.utils.auth import decode_token
 from open_webui.socket.utils import RedisDict, RedisLock
@@ -29,7 +37,19 @@ log.setLevel(SRC_LOG_LEVELS["SOCKET"])
 
 
 if WEBSOCKET_MANAGER == "redis":
-    mgr = socketio.AsyncRedisManager(WEBSOCKET_REDIS_URL)
+    if WEBSOCKET_SENTINEL_HOSTS:
+        redis_config = parse_redis_sentinel_url(WEBSOCKET_REDIS_URL)
+        mgr = AsyncRedisSentinelManager(
+            WEBSOCKET_SENTINEL_HOSTS.split(","),
+            sentinel_port=int(WEBSOCKET_SENTINEL_PORT),
+            redis_port=redis_config["port"],
+            service=redis_config["service"],
+            db=redis_config["db"],
+            username=redis_config["username"],
+            password=redis_config["password"],
+        )
+    else:
+        mgr = socketio.AsyncRedisManager(WEBSOCKET_REDIS_URL)
     sio = socketio.AsyncServer(
         cors_allowed_origins=[],
         async_mode="asgi",
@@ -55,14 +75,30 @@ TIMEOUT_DURATION = 3
 
 if WEBSOCKET_MANAGER == "redis":
     log.debug("Using Redis to manage websockets.")
-    SESSION_POOL = RedisDict("open-webui:session_pool", redis_url=WEBSOCKET_REDIS_URL)
-    USER_POOL = RedisDict("open-webui:user_pool", redis_url=WEBSOCKET_REDIS_URL)
-    USAGE_POOL = RedisDict("open-webui:usage_pool", redis_url=WEBSOCKET_REDIS_URL)
+    redis_sentinels = get_sentinels_from_env(
+        WEBSOCKET_SENTINEL_HOSTS, WEBSOCKET_SENTINEL_PORT
+    )
+    SESSION_POOL = RedisDict(
+        "open-webui:session_pool",
+        redis_url=WEBSOCKET_REDIS_URL,
+        redis_sentinels=redis_sentinels,
+    )
+    USER_POOL = RedisDict(
+        "open-webui:user_pool",
+        redis_url=WEBSOCKET_REDIS_URL,
+        redis_sentinels=redis_sentinels,
+    )
+    USAGE_POOL = RedisDict(
+        "open-webui:usage_pool",
+        redis_url=WEBSOCKET_REDIS_URL,
+        redis_sentinels=redis_sentinels,
+    )
 
     clean_up_lock = RedisLock(
         redis_url=WEBSOCKET_REDIS_URL,
         lock_name="usage_cleanup_lock",
         timeout_secs=WEBSOCKET_REDIS_LOCK_TIMEOUT,
+        redis_sentinels=redis_sentinels,
     )
     aquire_func = clean_up_lock.aquire_lock
     renew_func = clean_up_lock.renew_lock
@@ -269,11 +305,19 @@ async def disconnect(sid):
         # print(f"Unknown session ID {sid} disconnected")
 
 
-def get_event_emitter(request_info):
+def get_event_emitter(request_info, update_db=True):
     async def __event_emitter__(event_data):
         user_id = request_info["user_id"]
+
         session_ids = list(
-            set(USER_POOL.get(user_id, []) + [request_info["session_id"]])
+            set(
+                USER_POOL.get(user_id, [])
+                + (
+                    [request_info.get("session_id")]
+                    if request_info.get("session_id")
+                    else []
+                )
+            )
         )
 
         for session_id in session_ids:
@@ -287,40 +331,41 @@ def get_event_emitter(request_info):
                 to=session_id,
             )
 
-        if "type" in event_data and event_data["type"] == "status":
-            Chats.add_message_status_to_chat_by_id_and_message_id(
-                request_info["chat_id"],
-                request_info["message_id"],
-                event_data.get("data", {}),
-            )
-
-        if "type" in event_data and event_data["type"] == "message":
-            message = Chats.get_message_by_id_and_message_id(
-                request_info["chat_id"],
-                request_info["message_id"],
-            )
-
-            content = message.get("content", "")
-            content += event_data.get("data", {}).get("content", "")
-
-            Chats.upsert_message_to_chat_by_id_and_message_id(
-                request_info["chat_id"],
-                request_info["message_id"],
-                {
-                    "content": content,
-                },
-            )
-
-        if "type" in event_data and event_data["type"] == "replace":
-            content = event_data.get("data", {}).get("content", "")
-
-            Chats.upsert_message_to_chat_by_id_and_message_id(
-                request_info["chat_id"],
-                request_info["message_id"],
-                {
-                    "content": content,
-                },
-            )
+        if update_db:
+            if "type" in event_data and event_data["type"] == "status":
+                Chats.add_message_status_to_chat_by_id_and_message_id(
+                    request_info["chat_id"],
+                    request_info["message_id"],
+                    event_data.get("data", {}),
+                )
+
+            if "type" in event_data and event_data["type"] == "message":
+                message = Chats.get_message_by_id_and_message_id(
+                    request_info["chat_id"],
+                    request_info["message_id"],
+                )
+
+                content = message.get("content", "")
+                content += event_data.get("data", {}).get("content", "")
+
+                Chats.upsert_message_to_chat_by_id_and_message_id(
+                    request_info["chat_id"],
+                    request_info["message_id"],
+                    {
+                        "content": content,
+                    },
+                )
+
+            if "type" in event_data and event_data["type"] == "replace":
+                content = event_data.get("data", {}).get("content", "")
+
+                Chats.upsert_message_to_chat_by_id_and_message_id(
+                    request_info["chat_id"],
+                    request_info["message_id"],
+                    {
+                        "content": content,
+                    },
+                )
 
     return __event_emitter__
 

+ 9 - 5
backend/open_webui/socket/utils.py

@@ -1,15 +1,17 @@
 import json
-import redis
 import uuid
+from open_webui.utils.redis import get_redis_connection
 
 
 class RedisLock:
-    def __init__(self, redis_url, lock_name, timeout_secs):
+    def __init__(self, redis_url, lock_name, timeout_secs, redis_sentinels=[]):
         self.lock_name = lock_name
         self.lock_id = str(uuid.uuid4())
         self.timeout_secs = timeout_secs
         self.lock_obtained = False
-        self.redis = redis.Redis.from_url(redis_url, decode_responses=True)
+        self.redis = get_redis_connection(
+            redis_url, redis_sentinels, decode_responses=True
+        )
 
     def aquire_lock(self):
         # nx=True will only set this key if it _hasn't_ already been set
@@ -31,9 +33,11 @@ class RedisLock:
 
 
 class RedisDict:
-    def __init__(self, name, redis_url):
+    def __init__(self, name, redis_url, redis_sentinels=[]):
         self.name = name
-        self.redis = redis.Redis.from_url(redis_url, decode_responses=True)
+        self.redis = get_redis_connection(
+            redis_url, redis_sentinels, decode_responses=True
+        )
 
     def __setitem__(self, key, value):
         serialized_value = json.dumps(value)

+ 2 - 1
backend/open_webui/utils/filter.py

@@ -101,11 +101,12 @@ async def process_filter_functions(
                 form_data = handler(**params)
 
         except Exception as e:
-            log.exception(f"Error in {filter_type} handler {filter_id}: {e}")
+            log.debug(f"Error in {filter_type} handler {filter_id}: {e}")
             raise e
 
     # Handle file cleanup for inlet
     if skip_files and "files" in form_data.get("metadata", {}):
+        del form_data["files"]
         del form_data["metadata"]["files"]
 
     return form_data, {}

+ 182 - 64
backend/open_webui/utils/middleware.py

@@ -18,9 +18,7 @@ from uuid import uuid4
 from concurrent.futures import ThreadPoolExecutor
 
 
-from fastapi import Request
-from fastapi import BackgroundTasks
-
+from fastapi import Request, HTTPException
 from starlette.responses import Response, StreamingResponse
 
 
@@ -100,7 +98,7 @@ log.setLevel(SRC_LOG_LEVELS["MAIN"])
 
 
 async def chat_completion_tools_handler(
-    request: Request, body: dict, user: UserModel, models, tools
+    request: Request, body: dict, extra_params: dict, user: UserModel, models, tools
 ) -> tuple[dict, dict]:
     async def get_content_from_response(response) -> Optional[str]:
         content = None
@@ -135,6 +133,9 @@ async def chat_completion_tools_handler(
             "metadata": {"task": str(TASKS.FUNCTION_CALLING)},
         }
 
+    event_caller = extra_params["__event_call__"]
+    metadata = extra_params["__metadata__"]
+
     task_model_id = get_task_model_id(
         body["model"],
         request.app.state.config.TASK_MODEL,
@@ -156,7 +157,6 @@ async def chat_completion_tools_handler(
     tools_function_calling_prompt = tools_function_calling_generation_template(
         template, tools_specs
     )
-    log.info(f"{tools_function_calling_prompt=}")
     payload = get_tools_function_calling_payload(
         body["messages"], task_model_id, tools_function_calling_prompt
     )
@@ -189,34 +189,63 @@ async def chat_completion_tools_handler(
                 tool_function_params = tool_call.get("parameters", {})
 
                 try:
-                    required_params = (
-                        tools[tool_function_name]
-                        .get("spec", {})
-                        .get("parameters", {})
-                        .get("required", [])
+                    tool = tools[tool_function_name]
+
+                    spec = tool.get("spec", {})
+                    allowed_params = (
+                        spec.get("parameters", {}).get("properties", {}).keys()
                     )
-                    tool_function = tools[tool_function_name]["callable"]
                     tool_function_params = {
                         k: v
                         for k, v in tool_function_params.items()
-                        if k in required_params
+                        if k in allowed_params
                     }
-                    tool_output = await tool_function(**tool_function_params)
+
+                    if tool.get("direct", False):
+                        tool_result = await event_caller(
+                            {
+                                "type": "execute:tool",
+                                "data": {
+                                    "id": str(uuid4()),
+                                    "name": tool_function_name,
+                                    "params": tool_function_params,
+                                    "server": tool.get("server", {}),
+                                    "session_id": metadata.get("session_id", None),
+                                },
+                            }
+                        )
+                    else:
+                        tool_function = tool["callable"]
+                        tool_result = await tool_function(**tool_function_params)
 
                 except Exception as e:
-                    tool_output = str(e)
+                    tool_result = str(e)
+
+                if isinstance(tool_result, dict) or isinstance(tool_result, list):
+                    tool_result = json.dumps(tool_result, indent=2)
+
+                if isinstance(tool_result, str):
+                    tool = tools[tool_function_name]
+                    tool_id = tool.get("toolkit_id", "")
+                    if tool.get("citation", False) or tool.get("direct", False):
 
-                if isinstance(tool_output, str):
-                    if tools[tool_function_name]["citation"]:
                         sources.append(
                             {
                                 "source": {
-                                    "name": f"TOOL:{tools[tool_function_name]['toolkit_id']}/{tool_function_name}"
+                                    "name": (
+                                        f"TOOL:" + f"{tool_id}/{tool_function_name}"
+                                        if tool_id
+                                        else f"{tool_function_name}"
+                                    ),
                                 },
-                                "document": [tool_output],
+                                "document": [tool_result],
                                 "metadata": [
                                     {
-                                        "source": f"TOOL:{tools[tool_function_name]['toolkit_id']}/{tool_function_name}"
+                                        "source": (
+                                            f"TOOL:" + f"{tool_id}/{tool_function_name}"
+                                            if tool_id
+                                            else f"{tool_function_name}"
+                                        )
                                     }
                                 ],
                             }
@@ -225,16 +254,20 @@ async def chat_completion_tools_handler(
                         sources.append(
                             {
                                 "source": {},
-                                "document": [tool_output],
+                                "document": [tool_result],
                                 "metadata": [
                                     {
-                                        "source": f"TOOL:{tools[tool_function_name]['toolkit_id']}/{tool_function_name}"
+                                        "source": (
+                                            f"TOOL:" + f"{tool_id}/{tool_function_name}"
+                                            if tool_id
+                                            else f"{tool_function_name}"
+                                        )
                                     }
                                 ],
                             }
                         )
 
-                    if tools[tool_function_name]["file_handler"]:
+                    if tools[tool_function_name].get("file_handler", False):
                         skip_files = True
 
             # check if "tool_calls" in result
@@ -245,10 +278,10 @@ async def chat_completion_tools_handler(
                 await tool_call_handler(result)
 
         except Exception as e:
-            log.exception(f"Error: {e}")
+            log.debug(f"Error: {e}")
             content = None
     except Exception as e:
-        log.exception(f"Error: {e}")
+        log.debug(f"Error: {e}")
         content = None
 
     log.debug(f"tool_contexts: {sources}")
@@ -562,11 +595,12 @@ async def chat_completion_files_handler(
                         request=request,
                         files=files,
                         queries=queries,
-                        embedding_function=lambda query: request.app.state.EMBEDDING_FUNCTION(
-                            query, user=user
+                        embedding_function=lambda query, prefix: request.app.state.EMBEDDING_FUNCTION(
+                            query, prefix=prefix, user=user
                         ),
                         k=request.app.state.config.TOP_K,
                         reranking_function=request.app.state.rf,
+                        k_reranker=request.app.state.config.TOP_K_RERANKER,
                         r=request.app.state.config.RELEVANCE_THRESHOLD,
                         hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
                         full_context=request.app.state.config.RAG_FULL_CONTEXT,
@@ -766,12 +800,18 @@ async def process_chat_payload(request, form_data, user, metadata, model):
     }
     form_data["metadata"] = metadata
 
+    # Server side tools
     tool_ids = metadata.get("tool_ids", None)
+    # Client side tools
+    tool_servers = metadata.get("tool_servers", None)
+
     log.debug(f"{tool_ids=}")
+    log.debug(f"{tool_servers=}")
+
+    tools_dict = {}
 
     if tool_ids:
-        # If tool_ids field is present, then get the tools
-        tools = get_tools(
+        tools_dict = get_tools(
             request,
             tool_ids,
             user,
@@ -782,20 +822,31 @@ async def process_chat_payload(request, form_data, user, metadata, model):
                 "__files__": metadata.get("files", []),
             },
         )
-        log.info(f"{tools=}")
 
+    if tool_servers:
+        for tool_server in tool_servers:
+            tool_specs = tool_server.pop("specs", [])
+
+            for tool in tool_specs:
+                tools_dict[tool["name"]] = {
+                    "spec": tool,
+                    "direct": True,
+                    "server": tool_server,
+                }
+
+    if tools_dict:
         if metadata.get("function_calling") == "native":
             # If the function calling is native, then call the tools function calling handler
-            metadata["tools"] = tools
+            metadata["tools"] = tools_dict
             form_data["tools"] = [
                 {"type": "function", "function": tool.get("spec", {})}
-                for tool in tools.values()
+                for tool in tools_dict.values()
             ]
         else:
             # If the function calling is not native, then call the tools function calling handler
             try:
                 form_data, flags = await chat_completion_tools_handler(
-                    request, form_data, user, models, tools
+                    request, form_data, extra_params, user, models, tools_dict
                 )
                 sources.extend(flags.get("sources", []))
 
@@ -814,7 +865,7 @@ async def process_chat_payload(request, form_data, user, metadata, model):
         for source_idx, source in enumerate(sources):
             if "document" in source:
                 for doc_idx, doc_context in enumerate(source["document"]):
-                    context_string += f"<source><source_id>{source_idx}</source_id><source_context>{doc_context}</source_context></source>\n"
+                    context_string += f"<source><source_id>{source_idx + 1}</source_id><source_context>{doc_context}</source_context></source>\n"
 
         context_string = context_string.strip()
         prompt = get_last_user_message(form_data["messages"])
@@ -991,6 +1042,16 @@ async def process_chat_response(
     # Non-streaming response
     if not isinstance(response, StreamingResponse):
         if event_emitter:
+            if "error" in response:
+                error = response["error"].get("detail", response["error"])
+                Chats.upsert_message_to_chat_by_id_and_message_id(
+                    metadata["chat_id"],
+                    metadata["message_id"],
+                    {
+                        "error": {"content": error},
+                    },
+                )
+
             if "selected_model_id" in response:
                 Chats.upsert_message_to_chat_by_id_and_message_id(
                     metadata["chat_id"],
@@ -1000,7 +1061,8 @@ async def process_chat_response(
                     },
                 )
 
-            if response.get("choices", [])[0].get("message", {}).get("content"):
+            choices = response.get("choices", [])
+            if choices and choices[0].get("message", {}).get("content"):
                 content = response["choices"][0]["message"]["content"]
 
                 if content:
@@ -1081,8 +1143,6 @@ async def process_chat_response(
         for filter_id in get_sorted_filter_ids(model)
     ]
 
-    print(f"{filter_functions=}")
-
     # Streaming response
     if event_emitter and event_caller:
         task_id = str(uuid4())  # Create a unique task ID.
@@ -1121,36 +1181,51 @@ async def process_chat_response(
                     elif block["type"] == "tool_calls":
                         attributes = block.get("attributes", {})
 
-                        block_content = block.get("content", [])
+                        tool_calls = block.get("content", [])
                         results = block.get("results", [])
 
                         if results:
 
-                            result_display_content = ""
+                            tool_calls_display_content = ""
+                            for tool_call in tool_calls:
 
-                            for result in results:
-                                tool_call_id = result.get("tool_call_id", "")
-                                tool_name = ""
+                                tool_call_id = tool_call.get("id", "")
+                                tool_name = tool_call.get("function", {}).get(
+                                    "name", ""
+                                )
+                                tool_arguments = tool_call.get("function", {}).get(
+                                    "arguments", ""
+                                )
 
-                                for tool_call in block_content:
-                                    if tool_call.get("id", "") == tool_call_id:
-                                        tool_name = tool_call.get("function", {}).get(
-                                            "name", ""
-                                        )
+                                tool_result = None
+                                for result in results:
+                                    if tool_call_id == result.get("tool_call_id", ""):
+                                        tool_result = result.get("content", None)
                                         break
 
-                                result_display_content = f"{result_display_content}\n> {tool_name}: {result.get('content', '')}"
+                                if tool_result:
+                                    tool_calls_display_content = f'{tool_calls_display_content}\n<details type="tool_calls" done="true" id="{tool_call_id}" name="{tool_name}" arguments="{html.escape(json.dumps(tool_arguments))}" result="{html.escape(json.dumps(tool_result))}">\n<summary>Tool Executed</summary>\n</details>'
+                                else:
+                                    tool_calls_display_content = f'{tool_calls_display_content}\n<details type="tool_calls" done="false" id="{tool_call_id}" name="{tool_name}" arguments="{html.escape(json.dumps(tool_arguments))}">\n<summary>Executing...</summary>\n</details>'
 
                             if not raw:
-                                content = f'{content}\n<details type="tool_calls" done="true" content="{html.escape(json.dumps(block_content))}" results="{html.escape(json.dumps(results))}">\n<summary>Tool Executed</summary>\n{result_display_content}\n</details>\n'
+                                content = f"{content}\n{tool_calls_display_content}\n\n"
                         else:
                             tool_calls_display_content = ""
 
-                            for tool_call in block_content:
-                                tool_calls_display_content = f"{tool_calls_display_content}\n> Executing {tool_call.get('function', {}).get('name', '')}"
+                            for tool_call in tool_calls:
+                                tool_call_id = tool_call.get("id", "")
+                                tool_name = tool_call.get("function", {}).get(
+                                    "name", ""
+                                )
+                                tool_arguments = tool_call.get("function", {}).get(
+                                    "arguments", ""
+                                )
+
+                                tool_calls_display_content = f'{tool_calls_display_content}\n<details type="tool_calls" done="false" id="{tool_call_id}" name="{tool_name}" arguments="{html.escape(json.dumps(tool_arguments))}">\n<summary>Executing...</summary>\n</details>'
 
                             if not raw:
-                                content = f'{content}\n<details type="tool_calls" done="false" content="{html.escape(json.dumps(block_content))}">\n<summary>Tool Executing...</summary>\n{tool_calls_display_content}\n</details>\n'
+                                content = f"{content}\n{tool_calls_display_content}\n\n"
 
                     elif block["type"] == "reasoning":
                         reasoning_display_content = "\n".join(
@@ -1507,6 +1582,16 @@ async def process_chat_response(
                                 else:
                                     choices = data.get("choices", [])
                                     if not choices:
+                                        error = data.get("error", {})
+                                        if error:
+                                            await event_emitter(
+                                                {
+                                                    "type": "chat:completion",
+                                                    "data": {
+                                                        "error": error,
+                                                    },
+                                                }
+                                            )
                                         usage = data.get("usage", {})
                                         if usage:
                                             await event_emitter(
@@ -1562,7 +1647,9 @@ async def process_chat_response(
 
                                     value = delta.get("content")
 
-                                    reasoning_content = delta.get("reasoning_content")
+                                    reasoning_content = delta.get(
+                                        "reasoning_content"
+                                    ) or delta.get("reasoning")
                                     if reasoning_content:
                                         if (
                                             not content_blocks
@@ -1757,6 +1844,15 @@ async def process_chat_response(
                             )
                         except Exception as e:
                             log.debug(e)
+                            # Fallback to JSON parsing
+                            try:
+                                tool_function_params = json.loads(
+                                    tool_call.get("function", {}).get("arguments", "{}")
+                                )
+                            except Exception as e:
+                                log.debug(
+                                    f"Error parsing tool call arguments: {tool_call.get('function', {}).get('arguments', '{}')}"
+                                )
 
                         tool_result = None
 
@@ -1765,21 +1861,48 @@ async def process_chat_response(
                             spec = tool.get("spec", {})
 
                             try:
-                                required_params = spec.get("parameters", {}).get(
-                                    "required", []
+                                allowed_params = (
+                                    spec.get("parameters", {})
+                                    .get("properties", {})
+                                    .keys()
                                 )
-                                tool_function = tool["callable"]
+
                                 tool_function_params = {
                                     k: v
                                     for k, v in tool_function_params.items()
-                                    if k in required_params
+                                    if k in allowed_params
                                 }
-                                tool_result = await tool_function(
-                                    **tool_function_params
-                                )
+
+                                if tool.get("direct", False):
+                                    tool_result = await event_caller(
+                                        {
+                                            "type": "execute:tool",
+                                            "data": {
+                                                "id": str(uuid4()),
+                                                "name": tool_name,
+                                                "params": tool_function_params,
+                                                "server": tool.get("server", {}),
+                                                "session_id": metadata.get(
+                                                    "session_id", None
+                                                ),
+                                            },
+                                        }
+                                    )
+
+                                else:
+                                    tool_function = tool["callable"]
+                                    tool_result = await tool_function(
+                                        **tool_function_params
+                                    )
+
                             except Exception as e:
                                 tool_result = str(e)
 
+                        if isinstance(tool_result, dict) or isinstance(
+                            tool_result, list
+                        ):
+                            tool_result = json.dumps(tool_result, indent=2)
+
                         results.append(
                             {
                                 "tool_call_id": tool_call_id,
@@ -1982,11 +2105,6 @@ async def process_chat_response(
                             }
                         )
 
-                        log.info(f"content_blocks={content_blocks}")
-                        log.info(
-                            f"serialize_content_blocks={serialize_content_blocks(content_blocks)}"
-                        )
-
                         try:
                             res = await generate_chat_completion(
                                 request,

+ 1 - 0
backend/open_webui/utils/models.py

@@ -49,6 +49,7 @@ async def get_all_base_models(request: Request, user: UserModel = None):
                 "created": int(time.time()),
                 "owned_by": "ollama",
                 "ollama": model,
+                "tags": model.get("tags", []),
             }
             for model in ollama_models["models"]
         ]

+ 8 - 5
backend/open_webui/utils/oauth.py

@@ -94,7 +94,7 @@ class OAuthManager:
             oauth_claim = auth_manager_config.OAUTH_ROLES_CLAIM
             oauth_allowed_roles = auth_manager_config.OAUTH_ALLOWED_ROLES
             oauth_admin_roles = auth_manager_config.OAUTH_ADMIN_ROLES
-            oauth_roles = None
+            oauth_roles = []
             # Default/fallback role if no matching roles are found
             role = auth_manager_config.DEFAULT_USER_ROLE
 
@@ -104,7 +104,7 @@ class OAuthManager:
                 nested_claims = oauth_claim.split(".")
                 for nested_claim in nested_claims:
                     claim_data = claim_data.get(nested_claim, {})
-                oauth_roles = claim_data if isinstance(claim_data, list) else None
+                oauth_roles = claim_data if isinstance(claim_data, list) else []
 
             log.debug(f"Oauth Roles claim: {oauth_claim}")
             log.debug(f"User roles from oauth: {oauth_roles}")
@@ -140,6 +140,7 @@ class OAuthManager:
         log.debug("Running OAUTH Group management")
         oauth_claim = auth_manager_config.OAUTH_GROUPS_CLAIM
 
+        user_oauth_groups = []
         # Nested claim search for groups claim
         if oauth_claim:
             claim_data = user_data
@@ -160,7 +161,7 @@ class OAuthManager:
 
         # Remove groups that user is no longer a part of
         for group_model in user_current_groups:
-            if group_model.name not in user_oauth_groups:
+            if user_oauth_groups and group_model.name not in user_oauth_groups:
                 # Remove group from user
                 log.debug(
                     f"Removing user from group {group_model.name} as it is no longer in their oauth groups"
@@ -186,8 +187,10 @@ class OAuthManager:
 
         # Add user to new groups
         for group_model in all_available_groups:
-            if group_model.name in user_oauth_groups and not any(
-                gm.name == group_model.name for gm in user_current_groups
+            if (
+                user_oauth_groups
+                and group_model.name in user_oauth_groups
+                and not any(gm.name == group_model.name for gm in user_current_groups)
             ):
                 # Add user to group
                 log.debug(

+ 24 - 0
backend/open_webui/utils/payload.py

@@ -63,6 +63,7 @@ def apply_model_params_to_body_openai(params: dict, form_data: dict) -> dict:
         "seed": lambda x: x,
         "stop": lambda x: [bytes(s, "utf-8").decode("unicode_escape") for s in x],
         "logit_bias": lambda x: x,
+        "response_format": dict,
     }
     return apply_model_params_to_body(params, form_data, mappings)
 
@@ -110,6 +111,15 @@ def apply_model_params_to_body_ollama(params: dict, form_data: dict) -> dict:
         "num_thread": int,
     }
 
+    # Extract keep_alive from options if it exists
+    if "options" in form_data and "keep_alive" in form_data["options"]:
+        form_data["keep_alive"] = form_data["options"]["keep_alive"]
+        del form_data["options"]["keep_alive"]
+
+    if "options" in form_data and "format" in form_data["options"]:
+        form_data["format"] = form_data["options"]["format"]
+        del form_data["options"]["format"]
+
     return apply_model_params_to_body(params, form_data, mappings)
 
 
@@ -231,6 +241,11 @@ def convert_payload_openai_to_ollama(openai_payload: dict) -> dict:
                 "system"
             ]  # To prevent Ollama warning of invalid option provided
 
+        # Extract keep_alive from options if it exists
+        if "keep_alive" in ollama_options:
+            ollama_payload["keep_alive"] = ollama_options["keep_alive"]
+            del ollama_options["keep_alive"]
+
     # If there is the "stop" parameter in the openai_payload, remap it to the ollama_payload.options
     if "stop" in openai_payload:
         ollama_options = ollama_payload.get("options", {})
@@ -240,4 +255,13 @@ def convert_payload_openai_to_ollama(openai_payload: dict) -> dict:
     if "metadata" in openai_payload:
         ollama_payload["metadata"] = openai_payload["metadata"]
 
+    if "response_format" in openai_payload:
+        response_format = openai_payload["response_format"]
+        format_type = response_format.get("type", None)
+
+        schema = response_format.get(format_type, None)
+        if schema:
+            format = schema.get("schema", None)
+            ollama_payload["format"] = format
+
     return ollama_payload

+ 10 - 6
backend/open_webui/utils/plugin.py

@@ -7,7 +7,7 @@ import types
 import tempfile
 import logging
 
-from open_webui.env import SRC_LOG_LEVELS
+from open_webui.env import SRC_LOG_LEVELS, PIP_OPTIONS, PIP_PACKAGE_INDEX_OPTIONS
 from open_webui.models.functions import Functions
 from open_webui.models.tools import Tools
 
@@ -165,15 +165,19 @@ def load_function_module_by_id(function_id, content=None):
         os.unlink(temp_file.name)
 
 
-def install_frontmatter_requirements(requirements):
+def install_frontmatter_requirements(requirements: str):
     if requirements:
         try:
             req_list = [req.strip() for req in requirements.split(",")]
-            for req in req_list:
-                log.info(f"Installing requirement: {req}")
-                subprocess.check_call([sys.executable, "-m", "pip", "install", req])
+            log.info(f"Installing requirements: {' '.join(req_list)}")
+            subprocess.check_call(
+                [sys.executable, "-m", "pip", "install"]
+                + PIP_OPTIONS
+                + req_list
+                + PIP_PACKAGE_INDEX_OPTIONS
+            )
         except Exception as e:
-            log.error(f"Error installing package: {req}")
+            log.error(f"Error installing packages: {' '.join(req_list)}")
             raise e
 
     else:

+ 109 - 0
backend/open_webui/utils/redis.py

@@ -0,0 +1,109 @@
+import socketio
+import redis
+from redis import asyncio as aioredis
+from urllib.parse import urlparse
+
+
+def parse_redis_sentinel_url(redis_url):
+    parsed_url = urlparse(redis_url)
+    if parsed_url.scheme != "redis":
+        raise ValueError("Invalid Redis URL scheme. Must be 'redis'.")
+
+    return {
+        "username": parsed_url.username or None,
+        "password": parsed_url.password or None,
+        "service": parsed_url.hostname or "mymaster",
+        "port": parsed_url.port or 6379,
+        "db": int(parsed_url.path.lstrip("/") or 0),
+    }
+
+
+def get_redis_connection(redis_url, redis_sentinels, decode_responses=True):
+    if redis_sentinels:
+        redis_config = parse_redis_sentinel_url(redis_url)
+        sentinel = redis.sentinel.Sentinel(
+            redis_sentinels,
+            port=redis_config["port"],
+            db=redis_config["db"],
+            username=redis_config["username"],
+            password=redis_config["password"],
+            decode_responses=decode_responses,
+        )
+
+        # Get a master connection from Sentinel
+        return sentinel.master_for(redis_config["service"])
+    else:
+        # Standard Redis connection
+        return redis.Redis.from_url(redis_url, decode_responses=decode_responses)
+
+
+def get_sentinels_from_env(sentinel_hosts_env, sentinel_port_env):
+    if sentinel_hosts_env:
+        sentinel_hosts = sentinel_hosts_env.split(",")
+        sentinel_port = int(sentinel_port_env)
+        return [(host, sentinel_port) for host in sentinel_hosts]
+    return []
+
+
+class AsyncRedisSentinelManager(socketio.AsyncRedisManager):
+    def __init__(
+        self,
+        sentinel_hosts,
+        sentinel_port=26379,
+        redis_port=6379,
+        service="mymaster",
+        db=0,
+        username=None,
+        password=None,
+        channel="socketio",
+        write_only=False,
+        logger=None,
+        redis_options=None,
+    ):
+        """
+        Initialize the Redis Sentinel Manager.
+        This implementation mostly replicates the __init__ of AsyncRedisManager and
+        overrides _redis_connect() with a version that uses Redis Sentinel
+
+        :param sentinel_hosts: List of Sentinel hosts
+        :param sentinel_port: Sentinel Port
+        :param redis_port: Redis Port (currently unsupported by aioredis!)
+        :param service: Master service name in Sentinel
+        :param db: Redis database to use
+        :param username: Redis username (if any) (currently unsupported by aioredis!)
+        :param password: Redis password (if any)
+        :param channel: The channel name on which the server sends and receives
+                        notifications. Must be the same in all the servers.
+        :param write_only: If set to ``True``, only initialize to emit events. The
+                           default of ``False`` initializes the class for emitting
+                           and receiving.
+        :param redis_options: additional keyword arguments to be passed to
+                              ``aioredis.from_url()``.
+        """
+        self._sentinels = [(host, sentinel_port) for host in sentinel_hosts]
+        self._redis_port = redis_port
+        self._service = service
+        self._db = db
+        self._username = username
+        self._password = password
+        self._channel = channel
+        self.redis_options = redis_options or {}
+
+        # connect and call grandparent constructor
+        self._redis_connect()
+        super(socketio.AsyncRedisManager, self).__init__(
+            channel=channel, write_only=write_only, logger=logger
+        )
+
+    def _redis_connect(self):
+        """Establish connections to Redis through Sentinel."""
+        sentinel = aioredis.sentinel.Sentinel(
+            self._sentinels,
+            port=self._redis_port,
+            db=self._db,
+            password=self._password,
+            **self.redis_options,
+        )
+
+        self.redis = sentinel.master_for(self._service)
+        self.pubsub = self.redis.pubsub(ignore_subscribe_messages=True)

+ 0 - 0
backend/open_webui/utils/telemetry/__init__.py


+ 26 - 0
backend/open_webui/utils/telemetry/constants.py

@@ -0,0 +1,26 @@
+from opentelemetry.semconv.trace import SpanAttributes as _SpanAttributes
+
+# Span Tags
+SPAN_DB_TYPE = "mysql"
+SPAN_REDIS_TYPE = "redis"
+SPAN_DURATION = "duration"
+SPAN_SQL_STR = "sql"
+SPAN_SQL_EXPLAIN = "explain"
+SPAN_ERROR_TYPE = "error"
+
+
+class SpanAttributes(_SpanAttributes):
+    """
+    Span Attributes
+    """
+
+    DB_INSTANCE = "db.instance"
+    DB_TYPE = "db.type"
+    DB_IP = "db.ip"
+    DB_PORT = "db.port"
+    ERROR_KIND = "error.kind"
+    ERROR_OBJECT = "error.object"
+    ERROR_MESSAGE = "error.message"
+    RESULT_CODE = "result.code"
+    RESULT_MESSAGE = "result.message"
+    RESULT_ERRORS = "result.errors"

+ 31 - 0
backend/open_webui/utils/telemetry/exporters.py

@@ -0,0 +1,31 @@
+import threading
+
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+
+
+class LazyBatchSpanProcessor(BatchSpanProcessor):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.done = True
+        with self.condition:
+            self.condition.notify_all()
+        self.worker_thread.join()
+        self.done = False
+        self.worker_thread = None
+
+    def on_end(self, span: ReadableSpan) -> None:
+        if self.worker_thread is None:
+            self.worker_thread = threading.Thread(
+                name=self.__class__.__name__, target=self.worker, daemon=True
+            )
+            self.worker_thread.start()
+        super().on_end(span)
+
+    def shutdown(self) -> None:
+        self.done = True
+        with self.condition:
+            self.condition.notify_all()
+        if self.worker_thread:
+            self.worker_thread.join()
+        self.span_exporter.shutdown()

+ 202 - 0
backend/open_webui/utils/telemetry/instrumentors.py

@@ -0,0 +1,202 @@
+import logging
+import traceback
+from typing import Collection, Union
+
+from aiohttp import (
+    TraceRequestStartParams,
+    TraceRequestEndParams,
+    TraceRequestExceptionParams,
+)
+from chromadb.telemetry.opentelemetry.fastapi import instrument_fastapi
+from fastapi import FastAPI
+from opentelemetry.instrumentation.httpx import (
+    HTTPXClientInstrumentor,
+    RequestInfo,
+    ResponseInfo,
+)
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from opentelemetry.instrumentation.logging import LoggingInstrumentor
+from opentelemetry.instrumentation.redis import RedisInstrumentor
+from opentelemetry.instrumentation.requests import RequestsInstrumentor
+from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
+from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
+from opentelemetry.trace import Span, StatusCode
+from redis import Redis
+from requests import PreparedRequest, Response
+from sqlalchemy import Engine
+from fastapi import status
+
+from open_webui.utils.telemetry.constants import SPAN_REDIS_TYPE, SpanAttributes
+
+from open_webui.env import SRC_LOG_LEVELS
+
+logger = logging.getLogger(__name__)
+logger.setLevel(SRC_LOG_LEVELS["MAIN"])
+
+
+def requests_hook(span: Span, request: PreparedRequest):
+    """
+    Http Request Hook
+    """
+
+    span.update_name(f"{request.method} {request.url}")
+    span.set_attributes(
+        attributes={
+            SpanAttributes.HTTP_URL: request.url,
+            SpanAttributes.HTTP_METHOD: request.method,
+        }
+    )
+
+
+def response_hook(span: Span, request: PreparedRequest, response: Response):
+    """
+    HTTP Response Hook
+    """
+
+    span.set_attributes(
+        attributes={
+            SpanAttributes.HTTP_STATUS_CODE: response.status_code,
+        }
+    )
+    span.set_status(StatusCode.ERROR if response.status_code >= 400 else StatusCode.OK)
+
+
+def redis_request_hook(span: Span, instance: Redis, args, kwargs):
+    """
+    Redis Request Hook
+    """
+
+    try:
+        connection_kwargs: dict = instance.connection_pool.connection_kwargs
+        host = connection_kwargs.get("host")
+        port = connection_kwargs.get("port")
+        db = connection_kwargs.get("db")
+        span.set_attributes(
+            {
+                SpanAttributes.DB_INSTANCE: f"{host}/{db}",
+                SpanAttributes.DB_NAME: f"{host}/{db}",
+                SpanAttributes.DB_TYPE: SPAN_REDIS_TYPE,
+                SpanAttributes.DB_PORT: port,
+                SpanAttributes.DB_IP: host,
+                SpanAttributes.DB_STATEMENT: " ".join([str(i) for i in args]),
+                SpanAttributes.DB_OPERATION: str(args[0]),
+            }
+        )
+    except Exception:  # pylint: disable=W0718
+        logger.error(traceback.format_exc())
+
+
+def httpx_request_hook(span: Span, request: RequestInfo):
+    """
+    HTTPX Request Hook
+    """
+
+    span.update_name(f"{request.method.decode()} {str(request.url)}")
+    span.set_attributes(
+        attributes={
+            SpanAttributes.HTTP_URL: str(request.url),
+            SpanAttributes.HTTP_METHOD: request.method.decode(),
+        }
+    )
+
+
+def httpx_response_hook(span: Span, request: RequestInfo, response: ResponseInfo):
+    """
+    HTTPX Response Hook
+    """
+
+    span.set_attribute(SpanAttributes.HTTP_STATUS_CODE, response.status_code)
+    span.set_status(
+        StatusCode.ERROR
+        if response.status_code >= status.HTTP_400_BAD_REQUEST
+        else StatusCode.OK
+    )
+
+
+async def httpx_async_request_hook(span: Span, request: RequestInfo):
+    """
+    Async Request Hook
+    """
+
+    httpx_request_hook(span, request)
+
+
+async def httpx_async_response_hook(
+    span: Span, request: RequestInfo, response: ResponseInfo
+):
+    """
+    Async Response Hook
+    """
+
+    httpx_response_hook(span, request, response)
+
+
+def aiohttp_request_hook(span: Span, request: TraceRequestStartParams):
+    """
+    Aiohttp Request Hook
+    """
+
+    span.update_name(f"{request.method} {str(request.url)}")
+    span.set_attributes(
+        attributes={
+            SpanAttributes.HTTP_URL: str(request.url),
+            SpanAttributes.HTTP_METHOD: request.method,
+        }
+    )
+
+
+def aiohttp_response_hook(
+    span: Span, response: Union[TraceRequestExceptionParams, TraceRequestEndParams]
+):
+    """
+    Aiohttp Response Hook
+    """
+
+    if isinstance(response, TraceRequestEndParams):
+        span.set_attribute(SpanAttributes.HTTP_STATUS_CODE, response.response.status)
+        span.set_status(
+            StatusCode.ERROR
+            if response.response.status >= status.HTTP_400_BAD_REQUEST
+            else StatusCode.OK
+        )
+    elif isinstance(response, TraceRequestExceptionParams):
+        span.set_status(StatusCode.ERROR)
+        span.set_attribute(SpanAttributes.ERROR_MESSAGE, str(response.exception))
+
+
+class Instrumentor(BaseInstrumentor):
+    """
+    Instrument OT
+    """
+
+    def __init__(self, app: FastAPI, db_engine: Engine):
+        self.app = app
+        self.db_engine = db_engine
+
+    def instrumentation_dependencies(self) -> Collection[str]:
+        return []
+
+    def _instrument(self, **kwargs):
+        instrument_fastapi(app=self.app)
+        SQLAlchemyInstrumentor().instrument(engine=self.db_engine)
+        RedisInstrumentor().instrument(request_hook=redis_request_hook)
+        RequestsInstrumentor().instrument(
+            request_hook=requests_hook, response_hook=response_hook
+        )
+        LoggingInstrumentor().instrument()
+        HTTPXClientInstrumentor().instrument(
+            request_hook=httpx_request_hook,
+            response_hook=httpx_response_hook,
+            async_request_hook=httpx_async_request_hook,
+            async_response_hook=httpx_async_response_hook,
+        )
+        AioHttpClientInstrumentor().instrument(
+            request_hook=aiohttp_request_hook,
+            response_hook=aiohttp_response_hook,
+        )
+
+    def _uninstrument(self, **kwargs):
+        if getattr(self, "instrumentors", None) is None:
+            return
+        for instrumentor in self.instrumentors:
+            instrumentor.uninstrument()

+ 23 - 0
backend/open_webui/utils/telemetry/setup.py

@@ -0,0 +1,23 @@
+from fastapi import FastAPI
+from opentelemetry import trace
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
+from opentelemetry.sdk.resources import SERVICE_NAME, Resource
+from opentelemetry.sdk.trace import TracerProvider
+from sqlalchemy import Engine
+
+from open_webui.utils.telemetry.exporters import LazyBatchSpanProcessor
+from open_webui.utils.telemetry.instrumentors import Instrumentor
+from open_webui.env import OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT
+
+
+def setup(app: FastAPI, db_engine: Engine):
+    # set up trace
+    trace.set_tracer_provider(
+        TracerProvider(
+            resource=Resource.create(attributes={SERVICE_NAME: OTEL_SERVICE_NAME})
+        )
+    )
+    # otlp export
+    exporter = OTLPSpanExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT)
+    trace.get_tracer_provider().add_span_processor(LazyBatchSpanProcessor(exporter))
+    Instrumentor(app=app, db_engine=db_engine).instrument()

+ 6 - 2
backend/open_webui/utils/tools.py

@@ -1,6 +1,9 @@
 import inspect
 import logging
 import re
+import inspect
+import uuid
+
 from typing import Any, Awaitable, Callable, get_type_hints
 from functools import update_wrapper, partial
 
@@ -88,10 +91,11 @@ def get_tools(
 
             # TODO: This needs to be a pydantic model
             tool_dict = {
-                "toolkit_id": tool_id,
-                "callable": callable,
                 "spec": spec,
+                "callable": callable,
+                "toolkit_id": tool_id,
                 "pydantic_model": function_to_pydantic_model(callable),
+                # Misc info
                 "file_handler": hasattr(module, "file_handler") and module.file_handler,
                 "citation": hasattr(module, "citation") and module.citation,
             }

+ 16 - 2
backend/requirements.txt

@@ -37,13 +37,13 @@ asgiref==3.8.1
 # AI libraries
 openai
 anthropic
-google-generativeai==0.7.2
+google-generativeai==0.8.4
 tiktoken
 
 langchain==0.3.19
 langchain-community==0.3.18
 
-fake-useragent==1.5.1
+fake-useragent==2.1.0
 chromadb==0.6.2
 pymilvus==2.5.0
 qdrant-client~=1.12.0
@@ -78,6 +78,7 @@ sentencepiece
 soundfile==0.13.1
 azure-ai-documentintelligence==1.0.0
 
+pillow==11.1.0
 opencv-python-headless==4.11.0.86
 rapidocr-onnxruntime==1.3.24
 rank-bm25==0.2.2
@@ -118,3 +119,16 @@ ldap3==2.9.1
 
 ## Firecrawl
 firecrawl-py==1.12.0
+
+## Trace
+opentelemetry-api==1.30.0
+opentelemetry-sdk==1.30.0
+opentelemetry-exporter-otlp==1.30.0
+opentelemetry-instrumentation==0.51b0
+opentelemetry-instrumentation-fastapi==0.51b0
+opentelemetry-instrumentation-sqlalchemy==0.51b0
+opentelemetry-instrumentation-redis==0.51b0
+opentelemetry-instrumentation-requests==0.51b0
+opentelemetry-instrumentation-logging==0.51b0
+opentelemetry-instrumentation-httpx==0.51b0
+opentelemetry-instrumentation-aiohttp-client==0.51b0

+ 2 - 1
backend/start_windows.bat

@@ -41,4 +41,5 @@ IF "%WEBUI_SECRET_KEY%%WEBUI_JWT_SECRET_KEY%" == " " (
 
 :: Execute uvicorn
 SET "WEBUI_SECRET_KEY=%WEBUI_SECRET_KEY%"
-uvicorn open_webui.main:app --host "%HOST%" --port "%PORT%" --forwarded-allow-ips '*'
+uvicorn open_webui.main:app --host "%HOST%" --port "%PORT%" --forwarded-allow-ips '*' --ws auto
+:: For ssl user uvicorn open_webui.main:app --host "%HOST%" --port "%PORT%" --forwarded-allow-ips '*' --ssl-keyfile "key.pem" --ssl-certfile "cert.pem" --ws auto

File diff suppressed because it is too large
+ 305 - 189
package-lock.json


+ 4 - 2
package.json

@@ -1,6 +1,6 @@
 {
 	"name": "open-webui",
-	"version": "0.5.20",
+	"version": "0.6.0",
 	"private": true,
 	"scripts": {
 		"dev": "npm run pyodide:fetch && vite dev --host",
@@ -80,6 +80,8 @@
 		"file-saver": "^2.0.5",
 		"fuse.js": "^7.0.0",
 		"highlight.js": "^11.9.0",
+		"html-entities": "^2.5.3",
+		"html2canvas-pro": "^1.5.8",
 		"i18next": "^23.10.0",
 		"i18next-browser-languagedetector": "^7.2.0",
 		"i18next-resources-to-backend": "^1.2.0",
@@ -102,7 +104,7 @@
 		"prosemirror-schema-list": "^1.4.1",
 		"prosemirror-state": "^1.4.3",
 		"prosemirror-view": "^1.34.3",
-		"pyodide": "^0.27.2",
+		"pyodide": "^0.27.3",
 		"socket.io-client": "^4.2.0",
 		"sortablejs": "^1.15.2",
 		"svelte-sonner": "^0.3.19",

+ 2 - 1
pyproject.toml

@@ -51,7 +51,7 @@ dependencies = [
     "langchain==0.3.19",
     "langchain-community==0.3.18",
 
-    "fake-useragent==1.5.1",
+    "fake-useragent==2.1.0",
     "chromadb==0.6.2",
     "pymilvus==2.5.0",
     "qdrant-client~=1.12.0",
@@ -84,6 +84,7 @@ dependencies = [
     "soundfile==0.13.1",
     "azure-ai-documentintelligence==1.0.0",
 
+    "pillow==11.1.0",
     "opencv-python-headless==4.11.0.86",
     "rapidocr-onnxruntime==1.3.24",
     "rank-bm25==0.2.2",

+ 1 - 1
src/app.css

@@ -106,7 +106,7 @@ li p {
 }
 
 ::-webkit-scrollbar {
-	height: 0.4rem;
+	height: 0.8rem;
 	width: 0.4rem;
 }
 

+ 183 - 0
src/lib/apis/index.ts

@@ -1,6 +1,9 @@
 import { WEBUI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
+import { convertOpenApiToToolPayload } from '$lib/utils';
 import { getOpenAIModelsDirect } from './openai';
 
+import { toast } from 'svelte-sonner';
+
 export const getModels = async (
 	token: string = '',
 	connections: object | null = null,
@@ -114,6 +117,13 @@ export const getModels = async (
 					}
 				}
 
+				const tags = apiConfig.tags;
+				if (tags) {
+					for (const model of models) {
+						model.tags = tags;
+					}
+				}
+
 				localModels = localModels.concat(models);
 			}
 		}
@@ -249,6 +259,179 @@ export const stopTask = async (token: string, id: string) => {
 	return res;
 };
 
+export const getToolServerData = async (token: string, url: string) => {
+	let error = null;
+
+	const res = await fetch(`${url}/openapi.json`, {
+		method: 'GET',
+		headers: {
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
+			...(token && { authorization: `Bearer ${token}` })
+		}
+	})
+		.then(async (res) => {
+			if (!res.ok) throw await res.json();
+			return res.json();
+		})
+		.catch((err) => {
+			console.log(err);
+			if ('detail' in err) {
+				error = err.detail;
+			} else {
+				error = err;
+			}
+			return null;
+		});
+
+	if (error) {
+		throw error;
+	}
+
+	const data = {
+		openapi: res,
+		info: res.info,
+		specs: convertOpenApiToToolPayload(res)
+	};
+
+	console.log(data);
+	return data;
+};
+
+export const getToolServersData = async (i18n, servers: object[]) => {
+	return (
+		await Promise.all(
+			servers
+				.filter((server) => server?.config?.enable)
+				.map(async (server) => {
+					const data = await getToolServerData(server?.key, server?.url).catch((err) => {
+						toast.error(
+							i18n.t(`Failed to connect to {{URL}} OpenAPI tool server`, {
+								URL: server?.url
+							})
+						);
+						return null;
+					});
+
+					if (data) {
+						const { openapi, info, specs } = data;
+						return {
+							url: server?.url,
+							openapi: openapi,
+							info: info,
+							specs: specs
+						};
+					}
+				})
+		)
+	).filter((server) => server);
+};
+
+export const executeToolServer = async (
+	token: string,
+	url: string,
+	name: string,
+	params: Record<string, any>,
+	serverData: { openapi: any; info: any; specs: any }
+) => {
+	let error = null;
+
+	try {
+		// Find the matching operationId in the OpenAPI spec
+		const matchingRoute = Object.entries(serverData.openapi.paths).find(([_, methods]) =>
+			Object.entries(methods as any).some(([__, operation]: any) => operation.operationId === name)
+		);
+
+		if (!matchingRoute) {
+			throw new Error(`No matching route found for operationId: ${name}`);
+		}
+
+		const [routePath, methods] = matchingRoute;
+
+		const methodEntry = Object.entries(methods as any).find(
+			([_, operation]: any) => operation.operationId === name
+		);
+
+		if (!methodEntry) {
+			throw new Error(`No matching method found for operationId: ${name}`);
+		}
+
+		const [httpMethod, operation]: [string, any] = methodEntry;
+
+		// Split parameters by type
+		const pathParams: Record<string, any> = {};
+		const queryParams: Record<string, any> = {};
+		let bodyParams: any = {};
+
+		if (operation.parameters) {
+			operation.parameters.forEach((param: any) => {
+				const paramName = param.name;
+				const paramIn = param.in;
+				if (params.hasOwnProperty(paramName)) {
+					if (paramIn === 'path') {
+						pathParams[paramName] = params[paramName];
+					} else if (paramIn === 'query') {
+						queryParams[paramName] = params[paramName];
+					}
+				}
+			});
+		}
+
+		let finalUrl = `${url}${routePath}`;
+
+		// Replace path parameters (`{param}`)
+		Object.entries(pathParams).forEach(([key, value]) => {
+			finalUrl = finalUrl.replace(new RegExp(`{${key}}`, 'g'), encodeURIComponent(value));
+		});
+
+		// Append query parameters to URL if any
+		if (Object.keys(queryParams).length > 0) {
+			const queryString = new URLSearchParams(
+				Object.entries(queryParams).map(([k, v]) => [k, String(v)])
+			).toString();
+			finalUrl += `?${queryString}`;
+		}
+
+		// Handle requestBody composite
+		if (operation.requestBody && operation.requestBody.content) {
+			const contentType = Object.keys(operation.requestBody.content)[0];
+			if (params !== undefined) {
+				bodyParams = params;
+			} else {
+				// Optional: Fallback or explicit error if body is expected but not provided
+				throw new Error(`Request body expected for operation '${name}' but none found.`);
+			}
+		}
+
+		// Prepare headers and request options
+		const headers: Record<string, string> = {
+			'Content-Type': 'application/json',
+			...(token && { authorization: `Bearer ${token}` })
+		};
+
+		let requestOptions: RequestInit = {
+			method: httpMethod.toUpperCase(),
+			headers
+		};
+
+		if (['post', 'put', 'patch'].includes(httpMethod.toLowerCase()) && operation.requestBody) {
+			requestOptions.body = JSON.stringify(bodyParams);
+		}
+
+		const res = await fetch(finalUrl, requestOptions);
+		if (!res.ok) {
+			const resText = await res.text();
+			throw new Error(`HTTP error! Status: ${res.status}. Message: ${resText}`);
+		}
+
+		return await res.json();
+	} catch (err: any) {
+		error = err.message;
+		console.error('API Request Error:', error);
+		return { error };
+	}
+};
+
 export const getTaskConfig = async (token: string = '') => {
 	let error = null;
 

+ 39 - 8
src/lib/components/AddConnectionModal.svelte

@@ -14,6 +14,7 @@
 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
 	import Tooltip from '$lib/components/common/Tooltip.svelte';
 	import Switch from '$lib/components/common/Switch.svelte';
+	import Tags from './common/Tags.svelte';
 
 	export let onSubmit: Function = () => {};
 	export let onDelete: Function = () => {};
@@ -31,6 +32,7 @@
 
 	let prefixId = '';
 	let enable = true;
+	let tags = [];
 
 	let modelId = '';
 	let modelIds = [];
@@ -77,17 +79,21 @@
 	const submitHandler = async () => {
 		loading = true;
 
-		if (!ollama && (!url || !key)) {
+		if (!ollama && !url) {
 			loading = false;
-			toast.error('URL and Key are required');
+			toast.error('URL is required');
 			return;
 		}
 
+		// remove trailing slash from url
+		url = url.replace(/\/$/, '');
+
 		const connection = {
 			url,
 			key,
 			config: {
 				enable: enable,
+				tags: tags,
 				prefix_id: prefixId,
 				model_ids: modelIds
 			}
@@ -101,6 +107,7 @@
 		url = '';
 		key = '';
 		prefixId = '';
+		tags = [];
 		modelIds = [];
 	};
 
@@ -110,6 +117,7 @@
 			key = connection.key;
 
 			enable = connection.config?.enable ?? true;
+			tags = connection.config?.tags ?? [];
 			prefixId = connection.config?.prefix_id ?? '';
 			modelIds = connection.config?.model_ids ?? [];
 		}
@@ -179,7 +187,7 @@
 								</div>
 							</div>
 
-							<Tooltip content="Verify Connection" className="self-end -mb-1">
+							<Tooltip content={$i18n.t('Verify Connection')} className="self-end -mb-1">
 								<button
 									class="self-center p-1 bg-transparent hover:bg-gray-100 dark:bg-gray-900 dark:hover:bg-gray-850 rounded-lg transition"
 									on:click={() => {
@@ -218,7 +226,7 @@
 										className="w-full text-sm bg-transparent placeholder:text-gray-300 dark:placeholder:text-gray-700 outline-hidden"
 										bind:value={key}
 										placeholder={$i18n.t('API Key')}
-										required={!ollama}
+										required={false}
 									/>
 								</div>
 							</div>
@@ -244,6 +252,29 @@
 							</div>
 						</div>
 
+						<div class="flex gap-2 mt-2">
+							<div class="flex flex-col w-full">
+								<div class=" mb-1.5 text-xs text-gray-500">{$i18n.t('Tags')}</div>
+
+								<div class="flex-1">
+									<Tags
+										bind:tags
+										on:add={(e) => {
+											tags = [
+												...tags,
+												{
+													name: e.detail
+												}
+											];
+										}}
+										on:delete={(e) => {
+											tags = tags.filter((tag) => tag.name !== e.detail);
+										}}
+									/>
+								</div>
+							</div>
+						</div>
+
 						<hr class=" border-gray-100 dark:border-gray-700/10 my-2.5 w-full" />
 
 						<div class="flex flex-col w-full">
@@ -274,12 +305,12 @@
 							{:else}
 								<div class="text-gray-500 text-xs text-center py-2 px-10">
 									{#if ollama}
-										{$i18n.t('Leave empty to include all models from "{{URL}}/api/tags" endpoint', {
-											URL: url
+										{$i18n.t('Leave empty to include all models from "{{url}}/api/tags" endpoint', {
+											url: url
 										})}
 									{:else}
-										{$i18n.t('Leave empty to include all models from "{{URL}}/models" endpoint', {
-											URL: url
+										{$i18n.t('Leave empty to include all models from "{{url}}/models" endpoint', {
+											url: url
 										})}
 									{/if}
 								</div>

+ 215 - 0
src/lib/components/AddServerModal.svelte

@@ -0,0 +1,215 @@
+<script lang="ts">
+	import { toast } from 'svelte-sonner';
+	import { getContext, onMount } from 'svelte';
+	const i18n = getContext('i18n');
+
+	import { models } from '$lib/stores';
+	import { verifyOpenAIConnection } from '$lib/apis/openai';
+	import { verifyOllamaConnection } from '$lib/apis/ollama';
+
+	import Modal from '$lib/components/common/Modal.svelte';
+	import Plus from '$lib/components/icons/Plus.svelte';
+	import Minus from '$lib/components/icons/Minus.svelte';
+	import PencilSolid from '$lib/components/icons/PencilSolid.svelte';
+	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
+	import Tooltip from '$lib/components/common/Tooltip.svelte';
+	import Switch from '$lib/components/common/Switch.svelte';
+	import Tags from './common/Tags.svelte';
+
+	export let onSubmit: Function = () => {};
+	export let onDelete: Function = () => {};
+
+	export let show = false;
+	export let edit = false;
+
+	export let connection = null;
+
+	let url = '';
+	let key = '';
+	let enable = true;
+
+	let loading = false;
+
+	const submitHandler = async () => {
+		loading = true;
+
+		// remove trailing slash from url
+		url = url.replace(/\/$/, '');
+
+		const connection = {
+			url,
+			key,
+			config: {
+				enable: enable
+			}
+		};
+
+		await onSubmit(connection);
+
+		loading = false;
+		show = false;
+
+		url = '';
+		key = '';
+		enable = true;
+	};
+
+	const init = () => {
+		if (connection) {
+			url = connection.url;
+			key = connection.key;
+
+			enable = connection.config?.enable ?? true;
+		}
+	};
+
+	$: if (show) {
+		init();
+	}
+
+	onMount(() => {
+		init();
+	});
+</script>
+
+<Modal size="sm" bind:show>
+	<div>
+		<div class=" flex justify-between dark:text-gray-100 px-5 pt-4 pb-2">
+			<div class=" text-lg font-medium self-center font-primary">
+				{#if edit}
+					{$i18n.t('Edit Connection')}
+				{:else}
+					{$i18n.t('Add Connection')}
+				{/if}
+			</div>
+			<button
+				class="self-center"
+				on:click={() => {
+					show = false;
+				}}
+			>
+				<svg
+					xmlns="http://www.w3.org/2000/svg"
+					viewBox="0 0 20 20"
+					fill="currentColor"
+					class="w-5 h-5"
+				>
+					<path
+						d="M6.28 5.22a.75.75 0 00-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 101.06 1.06L10 11.06l3.72 3.72a.75.75 0 101.06-1.06L11.06 10l3.72-3.72a.75.75 0 00-1.06-1.06L10 8.94 6.28 5.22z"
+					/>
+				</svg>
+			</button>
+		</div>
+
+		<div class="flex flex-col md:flex-row w-full px-4 pb-4 md:space-x-4 dark:text-gray-200">
+			<div class=" flex flex-col w-full sm:flex-row sm:justify-center sm:space-x-6">
+				<form
+					class="flex flex-col w-full"
+					on:submit={(e) => {
+						e.preventDefault();
+						submitHandler();
+					}}
+				>
+					<div class="px-1">
+						<div class="flex gap-2">
+							<div class="flex flex-col w-full">
+								<div class=" mb-0.5 text-xs text-gray-500">{$i18n.t('URL')}</div>
+
+								<div class="flex-1">
+									<input
+										class="w-full text-sm bg-transparent placeholder:text-gray-300 dark:placeholder:text-gray-700 outline-hidden"
+										type="text"
+										bind:value={url}
+										placeholder={$i18n.t('API Base URL')}
+										autocomplete="off"
+										required
+									/>
+								</div>
+							</div>
+
+							<div class="flex flex-col shrink-0 self-end">
+								<Tooltip content={enable ? $i18n.t('Enabled') : $i18n.t('Disabled')}>
+									<Switch bind:state={enable} />
+								</Tooltip>
+							</div>
+						</div>
+
+						<div class="text-xs text-gray-500 mt-1">
+							{$i18n.t(`WebUI will make requests to "{{url}}/openapi.json"`, {
+								url: url
+							})}
+						</div>
+
+						<div class="flex gap-2 mt-2">
+							<div class="flex flex-col w-full">
+								<div class=" mb-0.5 text-xs text-gray-500">{$i18n.t('Key')}</div>
+
+								<div class="flex-1">
+									<SensitiveInput
+										className="w-full text-sm bg-transparent placeholder:text-gray-300 dark:placeholder:text-gray-700 outline-hidden"
+										bind:value={key}
+										placeholder={$i18n.t('API Key')}
+										required={false}
+									/>
+								</div>
+							</div>
+						</div>
+					</div>
+
+					<div class="flex justify-end pt-3 text-sm font-medium gap-1.5">
+						{#if edit}
+							<button
+								class="px-3.5 py-1.5 text-sm font-medium dark:bg-black dark:hover:bg-gray-900 dark:text-white bg-white text-black hover:bg-gray-100 transition rounded-full flex flex-row space-x-1 items-center"
+								type="button"
+								on:click={() => {
+									onDelete();
+									show = false;
+								}}
+							>
+								{$i18n.t('Delete')}
+							</button>
+						{/if}
+
+						<button
+							class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full flex flex-row space-x-1 items-center {loading
+								? ' cursor-not-allowed'
+								: ''}"
+							type="submit"
+							disabled={loading}
+						>
+							{$i18n.t('Save')}
+
+							{#if loading}
+								<div class="ml-2 self-center">
+									<svg
+										class=" w-4 h-4"
+										viewBox="0 0 24 24"
+										fill="currentColor"
+										xmlns="http://www.w3.org/2000/svg"
+										><style>
+											.spinner_ajPY {
+												transform-origin: center;
+												animation: spinner_AtaB 0.75s infinite linear;
+											}
+											@keyframes spinner_AtaB {
+												100% {
+													transform: rotate(360deg);
+												}
+											}
+										</style><path
+											d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
+											opacity=".25"
+										/><path
+											d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
+											class="spinner_ajPY"
+										/></svg
+									>
+								</div>
+							{/if}
+						</button>
+					</div>
+				</form>
+			</div>
+		</div>
+	</div>
+</Modal>

+ 11 - 11
src/lib/components/admin/Settings.svelte

@@ -71,7 +71,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'connections'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -95,7 +95,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'models'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -121,7 +121,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'evaluations'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -136,7 +136,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'documents'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -166,7 +166,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'web'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -190,7 +190,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'code-execution'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -216,7 +216,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'interface'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -242,7 +242,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'audio'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -269,7 +269,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'images'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -295,7 +295,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'pipelines'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"
@@ -325,7 +325,7 @@
 		</button>
 
 		<button
-			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-right transition {selectedTab ===
+			class="px-0.5 py-1 min-w-fit rounded-lg flex-1 md:flex-none flex text-left transition {selectedTab ===
 			'db'
 				? ''
 				: ' text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'}"

+ 13 - 1
src/lib/components/admin/Settings/Connections/OllamaConnection.svelte

@@ -5,6 +5,7 @@
 	import Tooltip from '$lib/components/common/Tooltip.svelte';
 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
 	import AddConnectionModal from '$lib/components/AddConnectionModal.svelte';
+	import ConfirmDialog from '$lib/components/common/ConfirmDialog.svelte';
 
 	import Cog6 from '$lib/components/icons/Cog6.svelte';
 	import Wrench from '$lib/components/icons/Wrench.svelte';
@@ -20,6 +21,7 @@
 
 	let showManageModal = false;
 	let showConfigModal = false;
+	let showDeleteConfirmDialog = false;
 </script>
 
 <AddConnectionModal
@@ -31,7 +33,9 @@
 		key: config?.key ?? '',
 		config: config
 	}}
-	{onDelete}
+	onDelete={() => {
+		showDeleteConfirmDialog = true;
+	}}
 	onSubmit={(connection) => {
 		url = connection.url;
 		config = { ...connection.config, key: connection.key };
@@ -39,6 +43,14 @@
 	}}
 />
 
+<ConfirmDialog
+	bind:show={showDeleteConfirmDialog}
+	on:confirm={() => {
+		onDelete();
+		showConfigModal = false;
+	}}
+/>
+
 <ManageOllamaModal bind:show={showManageModal} urlIdx={idx} />
 
 <div class="flex gap-1.5">

+ 12 - 1
src/lib/components/admin/Settings/Connections/OpenAIConnection.svelte

@@ -6,6 +6,7 @@
 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
 	import Cog6 from '$lib/components/icons/Cog6.svelte';
 	import AddConnectionModal from '$lib/components/AddConnectionModal.svelte';
+	import ConfirmDialog from '$lib/components/common/ConfirmDialog.svelte';
 
 	import { connect } from 'socket.io-client';
 
@@ -19,8 +20,16 @@
 	export let config = {};
 
 	let showConfigModal = false;
+	let showDeleteConfirmDialog = false;
 </script>
 
+<ConfirmDialog
+	bind:show={showDeleteConfirmDialog}
+	on:confirm={() => {
+		onDelete();
+	}}
+/>
+
 <AddConnectionModal
 	edit
 	bind:show={showConfigModal}
@@ -29,7 +38,9 @@
 		key,
 		config
 	}}
-	{onDelete}
+	onDelete={() => {
+		showDeleteConfirmDialog = true;
+	}}
 	onSubmit={(connection) => {
 		url = connection.url;
 		key = connection.key;

+ 155 - 107
src/lib/components/admin/Settings/Documents.svelte

@@ -49,6 +49,8 @@
 	let contentExtractionEngine = 'default';
 	let tikaServerUrl = '';
 	let showTikaServerUrl = false;
+	let doclingServerUrl = '';
+	let showDoclingServerUrl = false;
 	let documentIntelligenceEndpoint = '';
 	let documentIntelligenceKey = '';
 	let showDocumentIntelligenceConfig = false;
@@ -74,6 +76,7 @@
 		template: '',
 		r: 0.0,
 		k: 4,
+		k_reranker: 4,
 		hybrid: false
 	};
 
@@ -175,6 +178,10 @@
 			toast.error($i18n.t('Tika Server URL required.'));
 			return;
 		}
+		if (contentExtractionEngine === 'docling' && doclingServerUrl === '') {
+			toast.error($i18n.t('Docling Server URL required.'));
+			return;
+		}
 		if (
 			contentExtractionEngine === 'document_intelligence' &&
 			(documentIntelligenceEndpoint === '' || documentIntelligenceKey === '')
@@ -209,6 +216,7 @@
 			content_extraction: {
 				engine: contentExtractionEngine,
 				tika_server_url: tikaServerUrl,
+				docling_server_url: doclingServerUrl,
 				document_intelligence_config: {
 					key: documentIntelligenceKey,
 					endpoint: documentIntelligenceEndpoint
@@ -269,7 +277,10 @@
 
 			contentExtractionEngine = res.content_extraction.engine;
 			tikaServerUrl = res.content_extraction.tika_server_url;
+			doclingServerUrl = res.content_extraction.docling_server_url;
+
 			showTikaServerUrl = contentExtractionEngine === 'tika';
+			showDoclingServerUrl = contentExtractionEngine === 'docling';
 			documentIntelligenceEndpoint = res.content_extraction.document_intelligence_config.endpoint;
 			documentIntelligenceKey = res.content_extraction.document_intelligence_config.key;
 			showDocumentIntelligenceConfig = contentExtractionEngine === 'document_intelligence';
@@ -337,6 +348,7 @@
 							>
 								<option value="">{$i18n.t('Default')} </option>
 								<option value="tika">{$i18n.t('Tika')}</option>
+								<option value="docling">{$i18n.t('Docling')}</option>
 								<option value="document_intelligence">{$i18n.t('Document Intelligence')}</option>
 							</select>
 						</div>
@@ -351,6 +363,14 @@
 								/>
 							</div>
 						</div>
+					{:else if contentExtractionEngine === 'docling'}
+						<div class="flex w-full mt-1">
+							<input
+								class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
+								placeholder={$i18n.t('Enter Docling Server URL')}
+								bind:value={doclingServerUrl}
+							/>
+						</div>
 					{:else if contentExtractionEngine === 'document_intelligence'}
 						<div class="my-0.5 flex gap-2 pr-2">
 							<input
@@ -387,8 +407,12 @@
 					<div class="flex items-center relative">
 						<Tooltip
 							content={BYPASS_EMBEDDING_AND_RETRIEVAL
-								? 'Inject the entire content as context for comprehensive processing, this is recommended for complex queries.'
-								: 'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'}
+								? $i18n.t(
+										'Inject the entire content as context for comprehensive processing, this is recommended for complex queries.'
+									)
+								: $i18n.t(
+										'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'
+									)}
 						>
 							<Switch bind:state={BYPASS_EMBEDDING_AND_RETRIEVAL} />
 						</Tooltip>
@@ -619,148 +643,172 @@
 							</div>
 						</div>
 					{/if}
+				</div>
+
+				<div class="mb-3">
+					<div class=" mb-2.5 text-base font-medium">{$i18n.t('Retrieval')}</div>
+
+					<hr class=" border-gray-100 dark:border-gray-850 my-2" />
 
 					<div class="  mb-2.5 flex w-full justify-between">
 						<div class=" self-center text-xs font-medium">{$i18n.t('Full Context Mode')}</div>
 						<div class="flex items-center relative">
 							<Tooltip
 								content={RAG_FULL_CONTEXT
-									? 'Inject entire contents as context for comprehensive processing, this is recommended for complex queries.'
-									: 'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'}
+									? $i18n.t(
+											'Inject the entire content as context for comprehensive processing, this is recommended for complex queries.'
+										)
+									: $i18n.t(
+											'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'
+										)}
 							>
 								<Switch bind:state={RAG_FULL_CONTEXT} />
 							</Tooltip>
 						</div>
 					</div>
 
-					<div class="  mb-2.5 flex w-full justify-between">
-						<div class=" self-center text-xs font-medium">{$i18n.t('Hybrid Search')}</div>
-						<div class="flex items-center relative">
-							<Switch
-								bind:state={querySettings.hybrid}
-								on:change={() => {
-									toggleHybridSearch();
-								}}
-							/>
+					{#if !RAG_FULL_CONTEXT}
+						<div class="  mb-2.5 flex w-full justify-between">
+							<div class=" self-center text-xs font-medium">{$i18n.t('Hybrid Search')}</div>
+							<div class="flex items-center relative">
+								<Switch
+									bind:state={querySettings.hybrid}
+									on:change={() => {
+										toggleHybridSearch();
+									}}
+								/>
+							</div>
 						</div>
-					</div>
 
-					{#if querySettings.hybrid === true}
-						<div class="  mb-2.5 flex flex-col w-full">
-							<div class=" mb-1 text-xs font-medium">{$i18n.t('Reranking Model')}</div>
+						{#if querySettings.hybrid === true}
+							<div class="  mb-2.5 flex flex-col w-full">
+								<div class=" mb-1 text-xs font-medium">{$i18n.t('Reranking Model')}</div>
+
+								<div class="">
+									<div class="flex w-full">
+										<div class="flex-1 mr-2">
+											<input
+												class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
+												placeholder={$i18n.t('Set reranking model (e.g. {{model}})', {
+													model: 'BAAI/bge-reranker-v2-m3'
+												})}
+												bind:value={rerankingModel}
+											/>
+										</div>
+										<button
+											class="px-2.5 bg-transparent text-gray-800 dark:bg-transparent dark:text-gray-100 rounded-lg transition"
+											on:click={() => {
+												rerankingModelUpdateHandler();
+											}}
+											disabled={updateRerankingModelLoading}
+										>
+											{#if updateRerankingModelLoading}
+												<div class="self-center">
+													<svg
+														class=" w-4 h-4"
+														viewBox="0 0 24 24"
+														fill="currentColor"
+														xmlns="http://www.w3.org/2000/svg"
+													>
+														<style>
+															.spinner_ajPY {
+																transform-origin: center;
+																animation: spinner_AtaB 0.75s infinite linear;
+															}
 
-							<div class="">
-								<div class="flex w-full">
-									<div class="flex-1 mr-2">
-										<input
-											class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
-											placeholder={$i18n.t('Set reranking model (e.g. {{model}})', {
-												model: 'BAAI/bge-reranker-v2-m3'
-											})}
-											bind:value={rerankingModel}
-										/>
-									</div>
-									<button
-										class="px-2.5 bg-transparent text-gray-800 dark:bg-transparent dark:text-gray-100 rounded-lg transition"
-										on:click={() => {
-											rerankingModelUpdateHandler();
-										}}
-										disabled={updateRerankingModelLoading}
-									>
-										{#if updateRerankingModelLoading}
-											<div class="self-center">
+															@keyframes spinner_AtaB {
+																100% {
+																	transform: rotate(360deg);
+																}
+															}
+														</style>
+														<path
+															d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
+															opacity=".25"
+														/>
+														<path
+															d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
+															class="spinner_ajPY"
+														/>
+													</svg>
+												</div>
+											{:else}
 												<svg
-													class=" w-4 h-4"
-													viewBox="0 0 24 24"
-													fill="currentColor"
 													xmlns="http://www.w3.org/2000/svg"
+													viewBox="0 0 16 16"
+													fill="currentColor"
+													class="w-4 h-4"
 												>
-													<style>
-														.spinner_ajPY {
-															transform-origin: center;
-															animation: spinner_AtaB 0.75s infinite linear;
-														}
-
-														@keyframes spinner_AtaB {
-															100% {
-																transform: rotate(360deg);
-															}
-														}
-													</style>
 													<path
-														d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
-														opacity=".25"
+														d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
 													/>
 													<path
-														d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
-														class="spinner_ajPY"
+														d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
 													/>
 												</svg>
-											</div>
-										{:else}
-											<svg
-												xmlns="http://www.w3.org/2000/svg"
-												viewBox="0 0 16 16"
-												fill="currentColor"
-												class="w-4 h-4"
-											>
-												<path
-													d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
-												/>
-												<path
-													d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
-												/>
-											</svg>
-										{/if}
-									</button>
+											{/if}
+										</button>
+									</div>
 								</div>
 							</div>
-						</div>
-					{/if}
-				</div>
-
-				<div class="mb-3">
-					<div class=" mb-2.5 text-base font-medium">{$i18n.t('Retrieval')}</div>
-
-					<hr class=" border-gray-100 dark:border-gray-850 my-2" />
+						{/if}
 
-					<div class="  mb-2.5 flex w-full justify-between">
-						<div class=" self-center text-xs font-medium">{$i18n.t('Top K')}</div>
-						<div class="flex items-center relative">
-							<input
-								class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
-								type="number"
-								placeholder={$i18n.t('Enter Top K')}
-								bind:value={querySettings.k}
-								autocomplete="off"
-								min="0"
-							/>
+						<div class="  mb-2.5 flex w-full justify-between">
+							<div class=" self-center text-xs font-medium">{$i18n.t('Top K')}</div>
+							<div class="flex items-center relative">
+								<input
+									class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
+									type="number"
+									placeholder={$i18n.t('Enter Top K')}
+									bind:value={querySettings.k}
+									autocomplete="off"
+									min="0"
+								/>
+							</div>
 						</div>
-					</div>
 
-					{#if querySettings.hybrid === true}
-						<div class="  mb-2.5 flex flex-col w-full justify-between">
-							<div class=" flex w-full justify-between">
-								<div class=" self-center text-xs font-medium">{$i18n.t('Minimum Score')}</div>
+						{#if querySettings.hybrid === true}
+							<div class="mb-2.5 flex w-full justify-between">
+								<div class="self-center text-xs font-medium">{$i18n.t('Top K Reranker')}</div>
 								<div class="flex items-center relative">
 									<input
 										class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
 										type="number"
-										step="0.01"
-										placeholder={$i18n.t('Enter Score')}
-										bind:value={querySettings.r}
+										placeholder={$i18n.t('Enter Top K Reranker')}
+										bind:value={querySettings.k_reranker}
 										autocomplete="off"
-										min="0.0"
-										title={$i18n.t('The score should be a value between 0.0 (0%) and 1.0 (100%).')}
+										min="0"
 									/>
 								</div>
 							</div>
-							<div class="mt-1 text-xs text-gray-400 dark:text-gray-500">
-								{$i18n.t(
-									'Note: If you set a minimum score, the search will only return documents with a score greater than or equal to the minimum score.'
-								)}
+						{/if}
+
+						{#if querySettings.hybrid === true}
+							<div class="  mb-2.5 flex flex-col w-full justify-between">
+								<div class=" flex w-full justify-between">
+									<div class=" self-center text-xs font-medium">{$i18n.t('Minimum Score')}</div>
+									<div class="flex items-center relative">
+										<input
+											class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden"
+											type="number"
+											step="0.01"
+											placeholder={$i18n.t('Enter Score')}
+											bind:value={querySettings.r}
+											autocomplete="off"
+											min="0.0"
+											title={$i18n.t(
+												'The score should be a value between 0.0 (0%) and 1.0 (100%).'
+											)}
+										/>
+									</div>
+								</div>
+								<div class="mt-1 text-xs text-gray-400 dark:text-gray-500">
+									{$i18n.t(
+										'Note: If you set a minimum score, the search will only return documents with a score greater than or equal to the minimum score.'
+									)}
+								</div>
 							</div>
-						</div>
+						{/if}
 					{/if}
 
 					<div class="  mb-2.5 flex flex-col w-full justify-between">

+ 11 - 2
src/lib/components/admin/Settings/Evaluations/ArenaModelModal.svelte

@@ -10,6 +10,7 @@
 	import PencilSolid from '$lib/components/icons/PencilSolid.svelte';
 	import { toast } from 'svelte-sonner';
 	import AccessControl from '$lib/components/workspace/common/AccessControl.svelte';
+	import ConfirmDialog from '$lib/components/common/ConfirmDialog.svelte';
 
 	export let show = false;
 	export let edit = false;
@@ -44,6 +45,7 @@
 
 	let imageInputElement;
 	let loading = false;
+	let showDeleteConfirmDialog = false;
 
 	const addModelHandler = () => {
 		if (selectedModelId) {
@@ -115,6 +117,14 @@
 	});
 </script>
 
+<ConfirmDialog
+	bind:show={showDeleteConfirmDialog}
+	on:confirm={() => {
+		dispatch('delete', model);
+		show = false;
+	}}
+/>
+
 <Modal size="sm" bind:show>
 	<div>
 		<div class=" flex justify-between dark:text-gray-100 px-5 pt-4 pb-2">
@@ -378,8 +388,7 @@
 								class="px-3.5 py-1.5 text-sm font-medium dark:bg-black dark:hover:bg-gray-950 dark:text-white bg-white text-black hover:bg-gray-100 transition rounded-full flex flex-row space-x-1 items-center"
 								type="button"
 								on:click={() => {
-									dispatch('delete', model);
-									show = false;
+									showDeleteConfirmDialog = true;
 								}}
 							>
 								{$i18n.t('Delete')}

+ 8 - 1
src/lib/components/admin/Settings/General.svelte

@@ -554,7 +554,6 @@
 													</div>
 													<input
 														class="w-full bg-transparent outline-hidden py-0.5"
-														required
 														placeholder={$i18n.t('Enter certificate path')}
 														bind:value={LDAP_SERVER.certificate_path}
 													/>
@@ -610,6 +609,14 @@
 						<Switch bind:state={adminConfig.ENABLE_CHANNELS} />
 					</div>
 
+					<div class="mb-2.5 flex w-full items-center justify-between pr-2">
+						<div class=" self-center text-xs font-medium">
+							{$i18n.t('User Webhooks')}
+						</div>
+
+						<Switch bind:state={adminConfig.ENABLE_USER_WEBHOOKS} />
+					</div>
+
 					<div class="mb-2.5 w-full justify-between">
 						<div class="flex w-full justify-between">
 							<div class=" self-center text-xs font-medium">{$i18n.t('WebUI URL')}</div>

+ 9 - 5
src/lib/components/admin/Settings/Images.svelte

@@ -191,11 +191,15 @@
 			}
 
 			if (config.comfyui.COMFYUI_WORKFLOW) {
-				config.comfyui.COMFYUI_WORKFLOW = JSON.stringify(
-					JSON.parse(config.comfyui.COMFYUI_WORKFLOW),
-					null,
-					2
-				);
+				try {
+					config.comfyui.COMFYUI_WORKFLOW = JSON.stringify(
+						JSON.parse(config.comfyui.COMFYUI_WORKFLOW),
+						null,
+						2
+					);
+				} catch (e) {
+					console.log(e);
+				}
 			}
 
 			requiredWorkflowNodes = requiredWorkflowNodes.map((node) => {

+ 135 - 35
src/lib/components/admin/Settings/Models.svelte

@@ -29,6 +29,12 @@
 	import Wrench from '$lib/components/icons/Wrench.svelte';
 	import ArrowDownTray from '$lib/components/icons/ArrowDownTray.svelte';
 	import ManageModelsModal from './Models/ManageModelsModal.svelte';
+	import ModelMenu from '$lib/components/admin/Settings/Models/ModelMenu.svelte';
+	import EllipsisHorizontal from '$lib/components/icons/EllipsisHorizontal.svelte';
+	import EyeSlash from '$lib/components/icons/EyeSlash.svelte';
+	import Eye from '$lib/components/icons/Eye.svelte';
+
+	let shiftKey = false;
 
 	let importFiles;
 	let modelsImportInputElement: HTMLInputElement;
@@ -146,8 +152,62 @@
 		);
 	};
 
+	const hideModelHandler = async (model) => {
+		model.meta = {
+			...model.meta,
+			hidden: !(model?.meta?.hidden ?? false)
+		};
+
+		console.log(model);
+
+		toast.success(
+			model.meta.hidden
+				? $i18n.t(`Model {{name}} is now hidden`, {
+						name: model.id
+					})
+				: $i18n.t(`Model {{name}} is now visible`, {
+						name: model.id
+					})
+		);
+
+		upsertModelHandler(model);
+	};
+
+	const exportModelHandler = async (model) => {
+		let blob = new Blob([JSON.stringify([model])], {
+			type: 'application/json'
+		});
+		saveAs(blob, `${model.id}-${Date.now()}.json`);
+	};
+
 	onMount(async () => {
-		init();
+		await init();
+
+		const onKeyDown = (event) => {
+			if (event.key === 'Shift') {
+				shiftKey = true;
+			}
+		};
+
+		const onKeyUp = (event) => {
+			if (event.key === 'Shift') {
+				shiftKey = false;
+			}
+		};
+
+		const onBlur = () => {
+			shiftKey = false;
+		};
+
+		window.addEventListener('keydown', onKeyDown);
+		window.addEventListener('keyup', onKeyUp);
+		window.addEventListener('blur-sm', onBlur);
+
+		return () => {
+			window.removeEventListener('keydown', onKeyDown);
+			window.removeEventListener('keyup', onKeyUp);
+			window.removeEventListener('blur-sm', onBlur);
+		};
 	});
 </script>
 
@@ -211,7 +271,10 @@
 			{#if models.length > 0}
 				{#each filteredModels as model, modelIdx (model.id)}
 					<div
-						class=" flex space-x-4 cursor-pointer w-full px-3 py-2 dark:hover:bg-white/5 hover:bg-black/5 rounded-lg transition"
+						class=" flex space-x-4 cursor-pointer w-full px-3 py-2 dark:hover:bg-white/5 hover:bg-black/5 rounded-lg transition {model
+							?.meta?.hidden
+							? 'opacity-50 dark:opacity-50'
+							: ''}"
 						id="model-item-{model.id}"
 					>
 						<button
@@ -261,41 +324,78 @@
 							</div>
 						</button>
 						<div class="flex flex-row gap-0.5 items-center self-center">
-							<button
-								class="self-center w-fit text-sm px-2 py-2 dark:text-gray-300 dark:hover:text-white hover:bg-black/5 dark:hover:bg-white/5 rounded-xl"
-								type="button"
-								on:click={() => {
-									selectedModelId = model.id;
-								}}
-							>
-								<svg
-									xmlns="http://www.w3.org/2000/svg"
-									fill="none"
-									viewBox="0 0 24 24"
-									stroke-width="1.5"
-									stroke="currentColor"
-									class="w-4 h-4"
-								>
-									<path
-										stroke-linecap="round"
-										stroke-linejoin="round"
-										d="m16.862 4.487 1.687-1.688a1.875 1.875 0 1 1 2.652 2.652L6.832 19.82a4.5 4.5 0 0 1-1.897 1.13l-2.685.8.8-2.685a4.5 4.5 0 0 1 1.13-1.897L16.863 4.487Zm0 0L19.5 7.125"
-									/>
-								</svg>
-							</button>
-
-							<div class="ml-1">
-								<Tooltip
-									content={(model?.is_active ?? true) ? $i18n.t('Enabled') : $i18n.t('Disabled')}
-								>
-									<Switch
-										bind:state={model.is_active}
-										on:change={async () => {
-											toggleModelHandler(model);
+							{#if shiftKey}
+								<Tooltip content={model?.meta?.hidden ? $i18n.t('Show') : $i18n.t('Hide')}>
+									<button
+										class="self-center w-fit text-sm px-2 py-2 dark:text-gray-300 dark:hover:text-white hover:bg-black/5 dark:hover:bg-white/5 rounded-xl"
+										type="button"
+										on:click={() => {
+											hideModelHandler(model);
 										}}
-									/>
+									>
+										{#if model?.meta?.hidden}
+											<EyeSlash />
+										{:else}
+											<Eye />
+										{/if}
+									</button>
 								</Tooltip>
-							</div>
+							{:else}
+								<button
+									class="self-center w-fit text-sm px-2 py-2 dark:text-gray-300 dark:hover:text-white hover:bg-black/5 dark:hover:bg-white/5 rounded-xl"
+									type="button"
+									on:click={() => {
+										selectedModelId = model.id;
+									}}
+								>
+									<svg
+										xmlns="http://www.w3.org/2000/svg"
+										fill="none"
+										viewBox="0 0 24 24"
+										stroke-width="1.5"
+										stroke="currentColor"
+										class="w-4 h-4"
+									>
+										<path
+											stroke-linecap="round"
+											stroke-linejoin="round"
+											d="m16.862 4.487 1.687-1.688a1.875 1.875 0 1 1 2.652 2.652L6.832 19.82a4.5 4.5 0 0 1-1.897 1.13l-2.685.8.8-2.685a4.5 4.5 0 0 1 1.13-1.897L16.863 4.487Zm0 0L19.5 7.125"
+										/>
+									</svg>
+								</button>
+
+								<ModelMenu
+									user={$user}
+									{model}
+									exportHandler={() => {
+										exportModelHandler(model);
+									}}
+									hideHandler={() => {
+										hideModelHandler(model);
+									}}
+									onClose={() => {}}
+								>
+									<button
+										class="self-center w-fit text-sm p-1.5 dark:text-gray-300 dark:hover:text-white hover:bg-black/5 dark:hover:bg-white/5 rounded-xl"
+										type="button"
+									>
+										<EllipsisHorizontal className="size-5" />
+									</button>
+								</ModelMenu>
+
+								<div class="ml-1">
+									<Tooltip
+										content={(model?.is_active ?? true) ? $i18n.t('Enabled') : $i18n.t('Disabled')}
+									>
+										<Switch
+											bind:state={model.is_active}
+											on:change={async () => {
+												toggleModelHandler(model);
+											}}
+										/>
+									</Tooltip>
+								</div>
+							{/if}
 						</div>
 					</div>
 				{/each}

+ 2 - 1
src/lib/components/admin/Settings/Models/ModelList.svelte

@@ -33,6 +33,7 @@
 		if (modelListElement) {
 			sortable = Sortable.create(modelListElement, {
 				animation: 150,
+				handle: '.item-handle',
 				onUpdate: async (event) => {
 					positionChangeHandler();
 				}
@@ -47,7 +48,7 @@
 			<div class=" flex gap-2 w-full justify-between items-center" id="model-item-{modelId}">
 				<Tooltip content={modelId} placement="top-start">
 					<div class="flex items-center gap-1">
-						<EllipsisVertical className="size-4 cursor-move" />
+						<EllipsisVertical className="size-4 cursor-move item-handle" />
 
 						<div class=" text-sm flex-1 py-1 rounded-lg">
 							{#if $models.find((model) => model.id === modelId)}

+ 116 - 0
src/lib/components/admin/Settings/Models/ModelMenu.svelte

@@ -0,0 +1,116 @@
+<script lang="ts">
+	import { DropdownMenu } from 'bits-ui';
+	import { flyAndScale } from '$lib/utils/transitions';
+	import { getContext } from 'svelte';
+
+	import Dropdown from '$lib/components/common/Dropdown.svelte';
+	import GarbageBin from '$lib/components/icons/GarbageBin.svelte';
+	import Pencil from '$lib/components/icons/Pencil.svelte';
+	import Tooltip from '$lib/components/common/Tooltip.svelte';
+	import Tags from '$lib/components/chat/Tags.svelte';
+	import Share from '$lib/components/icons/Share.svelte';
+	import ArchiveBox from '$lib/components/icons/ArchiveBox.svelte';
+	import DocumentDuplicate from '$lib/components/icons/DocumentDuplicate.svelte';
+	import ArrowDownTray from '$lib/components/icons/ArrowDownTray.svelte';
+	import ArrowUpCircle from '$lib/components/icons/ArrowUpCircle.svelte';
+
+	import { config } from '$lib/stores';
+
+	const i18n = getContext('i18n');
+
+	export let user;
+	export let model;
+
+	export let exportHandler: Function;
+	export let hideHandler: Function;
+
+	export let onClose: Function;
+
+	let show = false;
+</script>
+
+<Dropdown
+	bind:show
+	on:change={(e) => {
+		if (e.detail === false) {
+			onClose();
+		}
+	}}
+>
+	<Tooltip content={$i18n.t('More')}>
+		<slot />
+	</Tooltip>
+
+	<div slot="content">
+		<DropdownMenu.Content
+			class="w-full max-w-[160px] rounded-xl px-1 py-1.5 border border-gray-300/30 dark:border-gray-700/50 z-50 bg-white dark:bg-gray-850 dark:text-white shadow-sm"
+			sideOffset={-2}
+			side="bottom"
+			align="start"
+			transition={flyAndScale}
+		>
+			<DropdownMenu.Item
+				class="flex  gap-2  items-center px-3 py-2 text-sm  font-medium cursor-pointer hover:bg-gray-50 dark:hover:bg-gray-800 rounded-md"
+				on:click={() => {
+					hideHandler();
+				}}
+			>
+				{#if model?.meta?.hidden ?? false}
+					<svg
+						xmlns="http://www.w3.org/2000/svg"
+						fill="none"
+						viewBox="0 0 24 24"
+						stroke-width="1.5"
+						stroke="currentColor"
+						class="size-4"
+					>
+						<path
+							stroke-linecap="round"
+							stroke-linejoin="round"
+							d="M3.98 8.223A10.477 10.477 0 0 0 1.934 12C3.226 16.338 7.244 19.5 12 19.5c.993 0 1.953-.138 2.863-.395M6.228 6.228A10.451 10.451 0 0 1 12 4.5c4.756 0 8.773 3.162 10.065 7.498a10.522 10.522 0 0 1-4.293 5.774M6.228 6.228 3 3m3.228 3.228 3.65 3.65m7.894 7.894L21 21m-3.228-3.228-3.65-3.65m0 0a3 3 0 1 0-4.243-4.243m4.242 4.242L9.88 9.88"
+						/>
+					</svg>
+				{:else}
+					<svg
+						xmlns="http://www.w3.org/2000/svg"
+						fill="none"
+						viewBox="0 0 24 24"
+						stroke-width="1.5"
+						stroke="currentColor"
+						class="size-4"
+					>
+						<path
+							stroke-linecap="round"
+							stroke-linejoin="round"
+							d="M2.036 12.322a1.012 1.012 0 0 1 0-.639C3.423 7.51 7.36 4.5 12 4.5c4.638 0 8.573 3.007 9.963 7.178.07.207.07.431 0 .639C20.577 16.49 16.64 19.5 12 19.5c-4.638 0-8.573-3.007-9.963-7.178Z"
+						/>
+						<path
+							stroke-linecap="round"
+							stroke-linejoin="round"
+							d="M15 12a3 3 0 1 1-6 0 3 3 0 0 1 6 0Z"
+						/>
+					</svg>
+				{/if}
+
+				<div class="flex items-center">
+					{#if model?.meta?.hidden ?? false}
+						{$i18n.t('Show Model')}
+					{:else}
+						{$i18n.t('Hide Model')}
+					{/if}
+				</div>
+			</DropdownMenu.Item>
+
+			<DropdownMenu.Item
+				class="flex gap-2 items-center px-3 py-2 text-sm  font-medium cursor-pointer hover:bg-gray-50 dark:hover:bg-gray-800 rounded-md"
+				on:click={() => {
+					exportHandler();
+				}}
+			>
+				<ArrowDownTray />
+
+				<div class="flex items-center">{$i18n.t('Export')}</div>
+			</DropdownMenu.Item>
+		</DropdownMenu.Content>
+	</div>
+</Dropdown>

+ 6 - 2
src/lib/components/admin/Settings/WebSearch.svelte

@@ -462,8 +462,12 @@
 						<div class="flex items-center relative">
 							<Tooltip
 								content={webConfig.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
-									? 'Inject the entire content as context for comprehensive processing, this is recommended for complex queries.'
-									: 'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'}
+									? $i18n.t(
+											'Inject the entire content as context for comprehensive processing, this is recommended for complex queries.'
+										)
+									: $i18n.t(
+											'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'
+										)}
 							>
 								<Switch bind:state={webConfig.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL} />
 							</Tooltip>

+ 8 - 1
src/lib/components/admin/Users/Groups.svelte

@@ -52,12 +52,19 @@
 			prompts: false,
 			tools: false
 		},
+		sharing: {
+			public_models: false,
+			public_knowledge: false,
+			public_prompts: false,
+			public_tools: false
+		},
 		chat: {
 			controls: true,
 			file_upload: true,
 			delete: true,
 			edit: true,
-			temporary: true
+			temporary: true,
+			temporary_enforced: true
 		},
 		features: {
 			web_search: true,

+ 14 - 3
src/lib/components/admin/Users/Groups/EditGroupModal.svelte

@@ -9,6 +9,7 @@
 	import Users from './Users.svelte';
 	import UserPlusSolid from '$lib/components/icons/UserPlusSolid.svelte';
 	import WrenchSolid from '$lib/components/icons/WrenchSolid.svelte';
+	import ConfirmDialog from '$lib/components/common/ConfirmDialog.svelte';
 
 	export let onSubmit: Function = () => {};
 	export let onDelete: Function = () => {};
@@ -25,6 +26,7 @@
 
 	let selectedTab = 'general';
 	let loading = false;
+	let showDeleteConfirmDialog = false;
 
 	export let name = '';
 	export let description = '';
@@ -88,6 +90,14 @@
 	});
 </script>
 
+<ConfirmDialog
+	bind:show={showDeleteConfirmDialog}
+	on:confirm={() => {
+		onDelete();
+		show = false;
+	}}
+/>
+
 <Modal size="md" bind:show>
 	<div>
 		<div class=" flex justify-between dark:text-gray-100 px-5 pt-4 mb-1.5">
@@ -263,18 +273,19 @@
 						{/if}
 					</div> -->
 
-					<div class="flex justify-end pt-3 text-sm font-medium gap-1.5">
+					<div class="flex justify-between pt-3 text-sm font-medium gap-1.5">
 						{#if edit}
 							<button
 								class="px-3.5 py-1.5 text-sm font-medium dark:bg-black dark:hover:bg-gray-900 dark:text-white bg-white text-black hover:bg-gray-100 transition rounded-full flex flex-row space-x-1 items-center"
 								type="button"
 								on:click={() => {
-									onDelete();
-									show = false;
+									showDeleteConfirmDialog = true;
 								}}
 							>
 								{$i18n.t('Delete')}
 							</button>
+						{:else}
+							<div></div>
 						{/if}
 
 						<button

+ 53 - 1
src/lib/components/admin/Users/Groups/Permissions.svelte

@@ -13,12 +13,19 @@
 			prompts: false,
 			tools: false
 		},
+		sharing: {
+			public_models: false,
+			public_knowledge: false,
+			public_prompts: false,
+			public_tools: false
+		},
 		chat: {
 			controls: true,
 			delete: true,
 			edit: true,
+			file_upload: true,
 			temporary: true,
-			file_upload: true
+			temporary_enforced: true
 		},
 		features: {
 			web_search: true,
@@ -39,6 +46,7 @@
 			...defaults,
 			...obj,
 			workspace: { ...defaults.workspace, ...obj.workspace },
+			sharing: { ...defaults.sharing, ...obj.sharing },
 			chat: { ...defaults.chat, ...obj.chat },
 			features: { ...defaults.features, ...obj.features }
 		};
@@ -194,6 +202,40 @@
 
 	<hr class=" border-gray-100 dark:border-gray-850 my-2" />
 
+	<div>
+		<div class=" mb-2 text-sm font-medium">{$i18n.t('Sharing Permissions')}</div>
+
+		<div class="  flex w-full justify-between my-2 pr-2">
+			<div class=" self-center text-xs font-medium">
+				{$i18n.t('Models Public Sharing')}
+			</div>
+			<Switch bind:state={permissions.sharing.public_models} />
+		</div>
+
+		<div class="  flex w-full justify-between my-2 pr-2">
+			<div class=" self-center text-xs font-medium">
+				{$i18n.t('Knowledge Public Sharing')}
+			</div>
+			<Switch bind:state={permissions.sharing.public_knowledge} />
+		</div>
+
+		<div class="  flex w-full justify-between my-2 pr-2">
+			<div class=" self-center text-xs font-medium">
+				{$i18n.t('Prompts Public Sharing')}
+			</div>
+			<Switch bind:state={permissions.sharing.public_prompts} />
+		</div>
+
+		<div class="  flex w-full justify-between my-2 pr-2">
+			<div class=" self-center text-xs font-medium">
+				{$i18n.t('Tools Public Sharing')}
+			</div>
+			<Switch bind:state={permissions.sharing.public_tools} />
+		</div>
+	</div>
+
+	<hr class=" border-gray-100 dark:border-gray-850 my-2" />
+
 	<div>
 		<div class=" mb-2 text-sm font-medium">{$i18n.t('Chat Permissions')}</div>
 
@@ -236,6 +278,16 @@
 
 			<Switch bind:state={permissions.chat.temporary} />
 		</div>
+
+		{#if permissions.chat.temporary}
+			<div class="  flex w-full justify-between my-2 pr-2">
+				<div class=" self-center text-xs font-medium">
+					{$i18n.t('Enforce Temporary Chat')}
+				</div>
+
+				<Switch bind:state={permissions.chat.temporary_enforced} />
+			</div>
+		{/if}
 	</div>
 
 	<hr class=" border-gray-100 dark:border-gray-850 my-2" />

+ 15 - 1
src/lib/components/admin/Users/UserList/UserChatsModal.svelte

@@ -12,6 +12,7 @@
 	import Modal from '$lib/components/common/Modal.svelte';
 	import Tooltip from '$lib/components/common/Tooltip.svelte';
 	import Spinner from '$lib/components/common/Spinner.svelte';
+	import ConfirmDialog from '$lib/components/common/ConfirmDialog.svelte';
 
 	const i18n = getContext('i18n');
 
@@ -19,6 +20,8 @@
 	export let user;
 
 	let chats = null;
+	let showDeleteConfirmDialog = false;
+	let chatToDelete = null;
 
 	const deleteChatHandler = async (chatId) => {
 		const res = await deleteChatById(localStorage.token, chatId).catch((error) => {
@@ -50,6 +53,16 @@
 	}
 </script>
 
+<ConfirmDialog
+	bind:show={showDeleteConfirmDialog}
+	on:confirm={() => {
+		if (chatToDelete) {
+			deleteChatHandler(chatToDelete);
+			chatToDelete = null;
+		}
+	}}
+/>
+
 <Modal size="lg" bind:show>
 	<div class=" flex justify-between dark:text-gray-300 px-5 pt-4">
 		<div class=" text-lg font-medium self-center capitalize">
@@ -142,7 +155,8 @@
 														<button
 															class="self-center w-fit text-sm px-2 py-2 hover:bg-black/5 dark:hover:bg-white/5 rounded-xl"
 															on:click={async () => {
-																deleteChatHandler(chat.id);
+																chatToDelete = chat.id;
+																showDeleteConfirmDialog = true;
 															}}
 														>
 															<svg

+ 7 - 4
src/lib/components/channel/Messages.svelte

@@ -73,10 +73,13 @@
 						<div class="text-2xl font-medium capitalize">{channel.name}</div>
 
 						<div class=" text-gray-500">
-							This channel was created on {dayjs(channel.created_at / 1000000).format(
-								'MMMM D, YYYY'
-							)}. This is the very beginning of the {channel.name}
-							channel.
+							{$i18n.t(
+								'This channel was created on {{createdAt}}. This is the very beginning of the {{channelName}} channel.',
+								{
+									createdAt: dayjs(channel.created_at / 1000000).format('MMMM D, YYYY'),
+									channelName: channel.name
+								}
+							)}
 						</div>
 					</div>
 				{:else}

+ 37 - 24
src/lib/components/chat/Chat.svelte

@@ -35,7 +35,8 @@
 		showOverview,
 		chatTitle,
 		showArtifacts,
-		tools
+		tools,
+		toolServers
 	} from '$lib/stores';
 	import {
 		convertMessagesToHistory,
@@ -119,6 +120,7 @@
 	let imageGenerationEnabled = false;
 	let webSearchEnabled = false;
 	let codeInterpreterEnabled = false;
+
 	let chat = null;
 	let tags = [];
 
@@ -212,7 +214,14 @@
 		const _chatId = JSON.parse(JSON.stringify($chatId));
 		let _messageId = JSON.parse(JSON.stringify(message.id));
 
-		let messageChildrenIds = history.messages[_messageId].childrenIds;
+		let messageChildrenIds = [];
+		if (_messageId === null) {
+			messageChildrenIds = Object.keys(history.messages).filter(
+				(id) => history.messages[id].parentId === null
+			);
+		} else {
+			messageChildrenIds = history.messages[_messageId].childrenIds;
+		}
 
 		while (messageChildrenIds.length !== 0) {
 			_messageId = messageChildrenIds.at(-1);
@@ -286,18 +295,10 @@
 				} else if (type === 'chat:tags') {
 					chat = await getChatById(localStorage.token, $chatId);
 					allTags.set(await getAllTags(localStorage.token));
-				} else if (type === 'message') {
+				} else if (type === 'chat:message:delta' || type === 'message') {
 					message.content += data.content;
-				} else if (type === 'replace') {
+				} else if (type === 'chat:message' || type === 'replace') {
 					message.content = data.content;
-				} else if (type === 'action') {
-					if (data.action === 'continue') {
-						const continueButton = document.getElementById('continue-response-button');
-
-						if (continueButton) {
-							continueButton.click();
-						}
-					}
 				} else if (type === 'confirmation') {
 					eventCallback = cb;
 
@@ -384,7 +385,7 @@
 		if (event.data.type === 'input:prompt:submit') {
 			console.debug(event.data.text);
 
-			if (prompt !== '') {
+			if (event.data.text !== '') {
 				await tick();
 				submitPrompt(event.data.text);
 			}
@@ -887,6 +888,8 @@
 				await chats.set(await getChatList(localStorage.token, $currentChatPage));
 			}
 		}
+
+		taskId = null;
 	};
 
 	const chatActionHandler = async (chatId, actionId, modelId, responseMessageId, event = null) => {
@@ -1276,12 +1279,13 @@
 		prompt = '';
 
 		// Reset chat input textarea
-		const chatInputElement = document.getElementById('chat-input');
+		if (!($settings?.richTextInput ?? true)) {
+			const chatInputElement = document.getElementById('chat-input');
 
-		if (chatInputElement) {
-			await tick();
-			chatInputElement.style.height = '';
-			chatInputElement.style.height = Math.min(chatInputElement.scrollHeight, 320) + 'px';
+			if (chatInputElement) {
+				await tick();
+				chatInputElement.style.height = '';
+			}
 		}
 
 		const _files = JSON.parse(JSON.stringify(files));
@@ -1563,6 +1567,7 @@
 
 				files: (files?.length ?? 0) > 0 ? files : undefined,
 				tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined,
+				tool_servers: $toolServers,
 
 				features: {
 					image_generation:
@@ -1621,7 +1626,7 @@
 					: {})
 			},
 			`${WEBUI_BASE_URL}/api`
-		).catch((error) => {
+		).catch(async (error) => {
 			toast.error(`${error}`);
 
 			responseMessage.error = {
@@ -1634,10 +1639,12 @@
 			return null;
 		});
 
-		console.log(res);
-
 		if (res) {
-			taskId = res.task_id;
+			if (res.error) {
+				await handleOpenAIError(res.error, responseMessage);
+			} else {
+				taskId = res.task_id;
+			}
 		}
 
 		await tick();
@@ -1654,9 +1661,11 @@
 
 		console.error(innerError);
 		if ('detail' in innerError) {
+			// FastAPI error
 			toast.error(innerError.detail);
 			errorMessage = innerError.detail;
 		} else if ('error' in innerError) {
+			// OpenAI error
 			if ('message' in innerError.error) {
 				toast.error(innerError.error.message);
 				errorMessage = innerError.error.message;
@@ -1665,6 +1674,7 @@
 				errorMessage = innerError.error;
 			}
 		} else if ('message' in innerError) {
+			// OpenAI error
 			toast.error(innerError.message);
 			errorMessage = innerError.message;
 		}
@@ -1683,9 +1693,10 @@
 		history.messages[responseMessage.id] = responseMessage;
 	};
 
-	const stopResponse = () => {
+	const stopResponse = async () => {
 		if (taskId) {
-			const res = stopTask(localStorage.token, taskId).catch((error) => {
+			const res = await stopTask(localStorage.token, taskId).catch((error) => {
+				toast.error(`${error}`);
 				return null;
 			});
 
@@ -2031,6 +2042,7 @@
 								bind:codeInterpreterEnabled
 								bind:webSearchEnabled
 								bind:atSelectedModel
+								toolServers={$toolServers}
 								transparentBackground={$settings?.backgroundImageUrl ?? false}
 								{stopResponse}
 								{createMessagePair}
@@ -2084,6 +2096,7 @@
 								bind:webSearchEnabled
 								bind:atSelectedModel
 								transparentBackground={$settings?.backgroundImageUrl ?? false}
+								toolServers={$toolServers}
 								{stopResponse}
 								{createMessagePair}
 								on:upload={async (e) => {

+ 1 - 1
src/lib/components/chat/ContentRenderer/FloatingButtons.svelte

@@ -263,7 +263,7 @@
 			</div>
 		{:else}
 			<div
-				class="py-1 flex dark:text-gray-100 bg-gray-50 dark:bg-gray-800 border dark:border-gray-850 w-72 rounded-full shadow-xl"
+				class="py-1 flex dark:text-gray-100 bg-gray-50 dark:bg-gray-800 border border-gray-100 dark:border-gray-850 w-72 rounded-full shadow-xl"
 			>
 				<input
 					type="text"

+ 36 - 40
src/lib/components/chat/Controls/Controls.svelte

@@ -30,45 +30,45 @@
 		</button>
 	</div>
 
-	{#if $user.role === 'admin' || $user?.permissions.chat?.controls}
-		<div class=" dark:text-gray-200 text-sm font-primary py-0.5 px-0.5">
-			{#if chatFiles.length > 0}
-				<Collapsible title={$i18n.t('Files')} open={true} buttonClassName="w-full">
-					<div class="flex flex-col gap-1 mt-1.5" slot="content">
-						{#each chatFiles as file, fileIdx}
-							<FileItem
-								className="w-full"
-								item={file}
-								edit={true}
-								url={file?.url ? file.url : null}
-								name={file.name}
-								type={file.type}
-								size={file?.size}
-								dismissible={true}
-								on:dismiss={() => {
-									// Remove the file from the chatFiles array
+	<div class=" dark:text-gray-200 text-sm font-primary py-0.5 px-0.5">
+		{#if chatFiles.length > 0}
+			<Collapsible title={$i18n.t('Files')} open={true} buttonClassName="w-full">
+				<div class="flex flex-col gap-1 mt-1.5" slot="content">
+					{#each chatFiles as file, fileIdx}
+						<FileItem
+							className="w-full"
+							item={file}
+							edit={true}
+							url={file?.url ? file.url : null}
+							name={file.name}
+							type={file.type}
+							size={file?.size}
+							dismissible={true}
+							on:dismiss={() => {
+								// Remove the file from the chatFiles array
 
-									chatFiles.splice(fileIdx, 1);
-									chatFiles = chatFiles;
-								}}
-								on:click={() => {
-									console.log(file);
-								}}
-							/>
-						{/each}
-					</div>
-				</Collapsible>
-
-				<hr class="my-2 border-gray-50 dark:border-gray-700/10" />
-			{/if}
-
-			<Collapsible bind:open={showValves} title={$i18n.t('Valves')} buttonClassName="w-full">
-				<div class="text-sm" slot="content">
-					<Valves show={showValves} />
+								chatFiles.splice(fileIdx, 1);
+								chatFiles = chatFiles;
+							}}
+							on:click={() => {
+								console.log(file);
+							}}
+						/>
+					{/each}
 				</div>
 			</Collapsible>
 
 			<hr class="my-2 border-gray-50 dark:border-gray-700/10" />
+		{/if}
+
+		<Collapsible bind:open={showValves} title={$i18n.t('Valves')} buttonClassName="w-full">
+			<div class="text-sm" slot="content">
+				<Valves show={showValves} />
+			</div>
+		</Collapsible>
+
+		{#if $user.role === 'admin' || $user?.permissions.chat?.controls}
+			<hr class="my-2 border-gray-50 dark:border-gray-700/10" />
 
 			<Collapsible title={$i18n.t('System Prompt')} open={true} buttonClassName="w-full">
 				<div class="" slot="content">
@@ -90,10 +90,6 @@
 					</div>
 				</div>
 			</Collapsible>
-		</div>
-	{:else}
-		<div class="text-sm dark:text-gray-300 text-center py-2 px-10">
-			{$i18n.t('You do not have permission to access this feature.')}
-		</div>
-	{/if}
+		{/if}
+	</div>
 </div>

+ 57 - 58
src/lib/components/chat/MessageInput.svelte

@@ -46,6 +46,7 @@
 	import Photo from '../icons/Photo.svelte';
 	import CommandLine from '../icons/CommandLine.svelte';
 	import { KokoroWorker } from '$lib/workers/KokoroWorker';
+	import ToolServersModal from './ToolServersModal.svelte';
 
 	const i18n = getContext('i18n');
 
@@ -68,6 +69,8 @@
 	export let prompt = '';
 	export let files = [];
 
+	export let toolServers = [];
+
 	export let selectedToolIds = [];
 
 	export let imageGenerationEnabled = false;
@@ -82,6 +85,8 @@
 		webSearchEnabled
 	});
 
+	let showToolServers = false;
+
 	let loaded = false;
 	let recording = false;
 
@@ -343,6 +348,8 @@
 
 <FilesOverlay show={dragged} />
 
+<ToolServersModal bind:show={showToolServers} />
+
 {#if loaded}
 	<div class="w-full font-primary">
 		<div class=" mx-auto inset-x-0 bg-transparent flex justify-center">
@@ -417,54 +424,6 @@
 								</div>
 							{/if}
 
-							{#if webSearchEnabled || ($config?.features?.enable_web_search && ($settings?.webSearch ?? false)) === 'always'}
-								<div class="flex items-center justify-between w-full">
-									<div class="flex items-center gap-2.5 text-sm dark:text-gray-500">
-										<div class="pl-1">
-											<span class="relative flex size-2">
-												<span
-													class="animate-ping absolute inline-flex h-full w-full rounded-full bg-blue-400 opacity-75"
-												/>
-												<span class="relative inline-flex rounded-full size-2 bg-blue-500" />
-											</span>
-										</div>
-										<div class=" translate-y-[0.5px]">{$i18n.t('Search the internet')}</div>
-									</div>
-								</div>
-							{/if}
-
-							{#if imageGenerationEnabled}
-								<div class="flex items-center justify-between w-full">
-									<div class="flex items-center gap-2.5 text-sm dark:text-gray-500">
-										<div class="pl-1">
-											<span class="relative flex size-2">
-												<span
-													class="animate-ping absolute inline-flex h-full w-full rounded-full bg-teal-400 opacity-75"
-												/>
-												<span class="relative inline-flex rounded-full size-2 bg-teal-500" />
-											</span>
-										</div>
-										<div class=" translate-y-[0.5px]">{$i18n.t('Generate an image')}</div>
-									</div>
-								</div>
-							{/if}
-
-							{#if codeInterpreterEnabled}
-								<div class="flex items-center justify-between w-full">
-									<div class="flex items-center gap-2.5 text-sm dark:text-gray-500">
-										<div class="pl-1">
-											<span class="relative flex size-2">
-												<span
-													class="animate-ping absolute inline-flex h-full w-full rounded-full bg-green-400 opacity-75"
-												/>
-												<span class="relative inline-flex rounded-full size-2 bg-green-500" />
-											</span>
-										</div>
-										<div class=" translate-y-[0.5px]">{$i18n.t('Execute code for analysis')}</div>
-									</div>
-								</div>
-							{/if}
-
 							{#if atSelectedModel !== undefined}
 								<div class="flex items-center justify-between w-full">
 									<div class="pl-[1px] flex items-center gap-2 text-sm dark:text-gray-500">
@@ -576,7 +535,7 @@
 							}}
 						>
 							<div
-								class="flex-1 flex flex-col relative w-full rounded-3xl px-1 bg-gray-600/5 dark:bg-gray-400/5 dark:text-gray-100"
+								class="flex-1 flex flex-col relative w-full shadow-lg rounded-3xl border border-gray-100 dark:border-gray-850 hover:border-gray-200 focus-within:border-gray-200 hover:dark:border-gray-800 focus-within:dark:border-gray-800 transition px-1 bg-white/90 dark:bg-gray-400/5 dark:text-gray-100"
 								dir={$settings?.chatDirection ?? 'LTR'}
 							>
 								{#if files.length > 0}
@@ -687,7 +646,8 @@
 														))}
 												placeholder={placeholder ? placeholder : $i18n.t('Send a Message')}
 												largeTextAsFile={$settings?.largeTextAsFile ?? false}
-												autocomplete={$config?.features.enable_autocomplete_generation}
+												autocomplete={$config?.features?.enable_autocomplete_generation &&
+													($settings?.promptAutocomplete ?? false)}
 												generateAutoCompletion={async (text) => {
 													if (selectedModelIds.length === 0 || !selectedModelIds.at(0)) {
 														toast.error($i18n.t('Please select a model first.'));
@@ -895,7 +855,6 @@
 											on:keydown={async (e) => {
 												const isCtrlPressed = e.ctrlKey || e.metaKey; // metaKey is for Cmd key on Mac
 
-												console.log('keydown', e);
 												const commandsContainerElement =
 													document.getElementById('commands-container');
 
@@ -997,7 +956,6 @@
 															return;
 														}
 
-														console.log('keypress', e);
 														// Prevent Enter key from creating a new line
 														const isCtrlPressed = e.ctrlKey || e.metaKey;
 														const enterPressed =
@@ -1175,14 +1133,14 @@
 														<button
 															on:click|preventDefault={() => (webSearchEnabled = !webSearchEnabled)}
 															type="button"
-															class="px-1.5 @sm:px-2.5 py-1.5 flex gap-1.5 items-center text-sm rounded-full font-medium transition-colors duration-300 focus:outline-hidden max-w-full overflow-hidden {webSearchEnabled ||
+															class="px-1.5 @xl:px-2.5 py-1.5 flex gap-1.5 items-center text-sm rounded-full font-medium transition-colors duration-300 focus:outline-hidden max-w-full overflow-hidden {webSearchEnabled ||
 															($settings?.webSearch ?? false) === 'always'
 																? 'bg-blue-100 dark:bg-blue-500/20 text-blue-500 dark:text-blue-400'
 																: 'bg-transparent text-gray-600 dark:text-gray-300 border-gray-200 hover:bg-gray-100 dark:hover:bg-gray-800'}"
 														>
 															<GlobeAlt className="size-5" strokeWidth="1.75" />
 															<span
-																class="hidden @sm:block whitespace-nowrap overflow-hidden text-ellipsis translate-y-[0.5px] mr-0.5"
+																class="hidden @xl:block whitespace-nowrap overflow-hidden text-ellipsis translate-y-[0.5px] mr-0.5"
 																>{$i18n.t('Web Search')}</span
 															>
 														</button>
@@ -1195,13 +1153,13 @@
 															on:click|preventDefault={() =>
 																(imageGenerationEnabled = !imageGenerationEnabled)}
 															type="button"
-															class="px-1.5 @sm:px-2.5 py-1.5 flex gap-1.5 items-center text-sm rounded-full font-medium transition-colors duration-300 focus:outline-hidden max-w-full overflow-hidden {imageGenerationEnabled
+															class="px-1.5 @xl:px-2.5 py-1.5 flex gap-1.5 items-center text-sm rounded-full font-medium transition-colors duration-300 focus:outline-hidden max-w-full overflow-hidden {imageGenerationEnabled
 																? 'bg-gray-100 dark:bg-gray-500/20 text-gray-600 dark:text-gray-400'
 																: 'bg-transparent text-gray-600 dark:text-gray-300 border-gray-200 hover:bg-gray-100 dark:hover:bg-gray-800 '}"
 														>
 															<Photo className="size-5" strokeWidth="1.75" />
 															<span
-																class="hidden @sm:block whitespace-nowrap overflow-hidden text-ellipsis translate-y-[0.5px] mr-0.5"
+																class="hidden @xl:block whitespace-nowrap overflow-hidden text-ellipsis translate-y-[0.5px] mr-0.5"
 																>{$i18n.t('Image')}</span
 															>
 														</button>
@@ -1214,13 +1172,13 @@
 															on:click|preventDefault={() =>
 																(codeInterpreterEnabled = !codeInterpreterEnabled)}
 															type="button"
-															class="px-1.5 @sm:px-2.5 py-1.5 flex gap-1.5 items-center text-sm rounded-full font-medium transition-colors duration-300 focus:outline-hidden max-w-full overflow-hidden {codeInterpreterEnabled
+															class="px-1.5 @xl:px-2.5 py-1.5 flex gap-1.5 items-center text-sm rounded-full font-medium transition-colors duration-300 focus:outline-hidden max-w-full overflow-hidden {codeInterpreterEnabled
 																? 'bg-gray-100 dark:bg-gray-500/20 text-gray-600 dark:text-gray-400'
 																: 'bg-transparent text-gray-600 dark:text-gray-300 border-gray-200 hover:bg-gray-100 dark:hover:bg-gray-800 '}"
 														>
 															<CommandLine className="size-5" strokeWidth="1.75" />
 															<span
-																class="hidden @sm:block whitespace-nowrap overflow-hidden text-ellipsis translate-y-[0.5px] mr-0.5"
+																class="hidden @xl:block whitespace-nowrap overflow-hidden text-ellipsis translate-y-[0.5px] mr-0.5"
 																>{$i18n.t('Code Interpreter')}</span
 															>
 														</button>
@@ -1231,6 +1189,47 @@
 									</div>
 
 									<div class="self-end flex space-x-1 mr-1 shrink-0">
+										{#if toolServers.length > 0}
+											<Tooltip
+												content={$i18n.t('{{COUNT}} Available Tool Servers', {
+													COUNT: toolServers.length
+												})}
+											>
+												<button
+													class="translate-y-[1.5px] flex gap-1 items-center text-gray-600 dark:text-gray-300 hover:text-gray-700 dark:hover:text-gray-200 rounded-lg px-1.5 py-0.5 mr-0.5 self-center border border-gray-100 dark:border-gray-800 transition"
+													aria-label="Available Tool Servers"
+													type="button"
+													on:click={() => {
+														showToolServers = !showToolServers;
+													}}
+												>
+													<svg
+														xmlns="http://www.w3.org/2000/svg"
+														fill="none"
+														viewBox="0 0 24 24"
+														stroke-width="1.5"
+														stroke="currentColor"
+														class="size-3"
+													>
+														<path
+															stroke-linecap="round"
+															stroke-linejoin="round"
+															d="M21.75 6.75a4.5 4.5 0 0 1-4.884 4.484c-1.076-.091-2.264.071-2.95.904l-7.152 8.684a2.548 2.548 0 1 1-3.586-3.586l8.684-7.152c.833-.686.995-1.874.904-2.95a4.5 4.5 0 0 1 6.336-4.486l-3.276 3.276a3.004 3.004 0 0 0 2.25 2.25l3.276-3.276c.256.565.398 1.192.398 1.852Z"
+														/>
+														<path
+															stroke-linecap="round"
+															stroke-linejoin="round"
+															d="M4.867 19.125h.008v.008h-.008v-.008Z"
+														/>
+													</svg>
+
+													<span class="text-xs">
+														{toolServers.length}
+													</span>
+												</button>
+											</Tooltip>
+										{/if}
+
 										{#if !history?.currentId || history.messages[history.currentId]?.done == true}
 											<Tooltip content={$i18n.t('Record voice')}>
 												<button

+ 1 - 1
src/lib/components/chat/MessageInput/Commands/Knowledge.svelte

@@ -210,7 +210,7 @@
 									{/if}
 
 									<div class="line-clamp-1">
-										{item?.name}
+										{decodeURIComponent(item?.name)}
 									</div>
 								</div>
 

+ 16 - 2
src/lib/components/chat/MessageInput/Commands/Prompts.svelte

@@ -1,5 +1,5 @@
 <script lang="ts">
-	import { prompts, user } from '$lib/stores';
+	import { prompts, settings, user } from '$lib/stores';
 	import {
 		findWordIndices,
 		getUserPosition,
@@ -120,7 +120,21 @@
 			text = text.replaceAll('{{CURRENT_WEEKDAY}}', weekday);
 		}
 
-		prompt = text;
+		const lines = prompt.split('\n');
+		const lastLine = lines.pop();
+
+		const lastLineWords = lastLine.split(' ');
+		const lastWord = lastLineWords.pop();
+
+		if ($settings?.richTextInput ?? true) {
+			lastLineWords.push(`${text.replace(/</g, '&lt;').replace(/>/g, '&gt;')}`);
+			lines.push(lastLineWords.join(' '));
+		} else {
+			lastLineWords.push(text);
+			lines.push(lastLineWords.join(' '));
+		}
+
+		prompt = lines.join('\n');
 
 		const chatInputContainerElement = document.getElementById('chat-input-container');
 		const chatInputElement = document.getElementById('chat-input');

+ 2 - 2
src/lib/components/chat/MessageInput/InputMenu.svelte

@@ -94,8 +94,8 @@
 
 	<div slot="content">
 		<DropdownMenu.Content
-			class="w-full max-w-[220px] rounded-xl px-1 py-1  border-gray-300/30 dark:border-gray-700/50 z-50 bg-white dark:bg-gray-850 dark:text-white shadow-sm"
-			sideOffset={15}
+			class="w-full max-w-[200px] rounded-xl px-1 py-1 border border-gray-300/30 dark:border-gray-700/50 z-50 bg-white dark:bg-gray-850 dark:text-white shadow-sm"
+			sideOffset={10}
 			alignOffset={-8}
 			side="top"
 			align="start"

+ 42 - 0
src/lib/components/chat/Messages.svelte

@@ -107,6 +107,47 @@
 		}
 	};
 
+	const gotoMessage = async (message, idx) => {
+		// Determine the correct sibling list (either parent's children or root messages)
+		let siblings;
+		if (message.parentId !== null) {
+			siblings = history.messages[message.parentId].childrenIds;
+		} else {
+			siblings = Object.values(history.messages)
+				.filter((msg) => msg.parentId === null)
+				.map((msg) => msg.id);
+		}
+
+		// Clamp index to a valid range
+		idx = Math.max(0, Math.min(idx, siblings.length - 1));
+
+		let messageId = siblings[idx];
+
+		// If we're navigating to a different message
+		if (message.id !== messageId) {
+			// Drill down to the deepest child of that branch
+			let messageChildrenIds = history.messages[messageId].childrenIds;
+			while (messageChildrenIds.length !== 0) {
+				messageId = messageChildrenIds.at(-1);
+				messageChildrenIds = history.messages[messageId].childrenIds;
+			}
+
+			history.currentId = messageId;
+		}
+
+		await tick();
+
+		// Optional auto-scroll
+		if ($settings?.scrollOnBranchChange ?? true) {
+			const element = document.getElementById('messages-container');
+			autoScroll = element.scrollHeight - element.scrollTop <= element.clientHeight + 50;
+
+			setTimeout(() => {
+				scrollToBottom();
+			}, 100);
+		}
+	};
+
 	const showPreviousMessage = async (message) => {
 		if (message.parentId !== null) {
 			let messageId =
@@ -408,6 +449,7 @@
 							messageId={message.id}
 							idx={messageIdx}
 							{user}
+							{gotoMessage}
 							{showPreviousMessage}
 							{showNextMessage}
 							{updateChat}

+ 6 - 6
src/lib/components/chat/Messages/Citations.svelte

@@ -102,7 +102,7 @@
 			<div class="flex text-xs font-medium flex-wrap">
 				{#each citations as citation, idx}
 					<button
-						id={`source-${id}-${idx}`}
+						id={`source-${id}-${idx + 1}`}
 						class="no-toggle outline-hidden flex dark:text-gray-300 p-1 bg-white dark:bg-gray-900 rounded-xl max-w-96"
 						on:click={() => {
 							showCitationModal = true;
@@ -117,14 +117,14 @@
 						<div
 							class="flex-1 mx-1 truncate text-black/60 hover:text-black dark:text-white/60 dark:hover:text-white transition"
 						>
-							{citation.source.name}
+							{decodeURIComponent(citation.source.name)}
 						</div>
 					</button>
 				{/each}
 			</div>
 		{:else}
 			<Collapsible
-				id="collapsible-sources"
+				id={`collapsible-${id}`}
 				bind:open={isCollapsibleOpen}
 				className="w-full max-w-full "
 				buttonClassName="w-fit max-w-full"
@@ -157,7 +157,7 @@
 											</div>
 										{/if}
 										<div class="flex-1 mx-1 truncate">
-											{citation.source.name}
+											{decodeURIComponent(citation.source.name)}
 										</div>
 									</button>
 								{/each}
@@ -181,7 +181,7 @@
 					<div class="flex text-xs font-medium flex-wrap">
 						{#each citations as citation, idx}
 							<button
-								id={`source-${id}-${idx}`}
+								id={`source-${id}-${idx + 1}`}
 								class="no-toggle outline-hidden flex dark:text-gray-300 p-1 bg-gray-50 hover:bg-gray-100 dark:bg-gray-900 dark:hover:bg-gray-850 transition rounded-xl max-w-96"
 								on:click={() => {
 									showCitationModal = true;
@@ -194,7 +194,7 @@
 									</div>
 								{/if}
 								<div class="flex-1 mx-1 truncate">
-									{citation.source.name}
+									{decodeURIComponent(citation.source.name)}
 								</div>
 							</button>
 						{/each}

+ 3 - 3
src/lib/components/chat/Messages/CitationsModal.svelte

@@ -98,7 +98,7 @@
 												: `#`}
 										target="_blank"
 									>
-										{document?.metadata?.name ?? document.source.name}
+										{decodeURIComponent(document?.metadata?.name ?? document.source.name)}
 									</a>
 									{#if document?.metadata?.page}
 										<span class="text-xs text-gray-500 dark:text-gray-400">
@@ -128,11 +128,11 @@
 													{percentage.toFixed(2)}%
 												</span>
 												<span class="text-gray-500 dark:text-gray-500">
-													({document.distance.toFixed(4)})
+													({(document?.distance ?? 0).toFixed(4)})
 												</span>
 											{:else}
 												<span class="text-gray-500 dark:text-gray-500">
-													{document.distance.toFixed(4)}
+													{(document?.distance ?? 0).toFixed(4)}
 												</span>
 											{/if}
 										</div>

+ 4 - 2
src/lib/components/chat/Messages/CodeBlock.svelte

@@ -27,6 +27,7 @@
 
 	export let save = false;
 	export let run = true;
+	export let collapsed = false;
 
 	export let token;
 	export let lang = '';
@@ -60,7 +61,6 @@
 	let result = null;
 	let files = null;
 
-	let collapsed = false;
 	let copied = false;
 	let saved = false;
 
@@ -441,7 +441,9 @@
 
 					{#if ($config?.features?.enable_code_execution ?? true) && (lang.toLowerCase() === 'python' || lang.toLowerCase() === 'py' || (lang === '' && checkPythonCode(code)))}
 						{#if executing}
-							<div class="run-code-button bg-none border-none p-1 cursor-not-allowed">Running</div>
+							<div class="run-code-button bg-none border-none p-1 cursor-not-allowed">
+								{$i18n.t('Running')}
+							</div>
 						{:else if run}
 							<button
 								class="flex gap-1 items-center run-code-button bg-none border-none bg-gray-50 hover:bg-gray-100 dark:bg-gray-850 dark:hover:bg-gray-800 transition rounded-md px-1.5 py-0.5"

+ 6 - 1
src/lib/components/chat/Messages/ContentRenderer.svelte

@@ -84,7 +84,12 @@
 		}
 
 		if (floatingButtonsElement) {
-			floatingButtonsElement.closeHandler();
+			// check if closeHandler is defined
+
+			if (typeof floatingButtonsElement?.closeHandler === 'function') {
+				// call the closeHandler function
+				floatingButtonsElement?.closeHandler();
+			}
 		}
 	};
 

+ 1 - 1
src/lib/components/chat/Messages/Markdown.svelte

@@ -11,7 +11,7 @@
 
 	const dispatch = createEventDispatcher();
 
-	export let id;
+	export let id = '';
 	export let content;
 	export let model = null;
 	export let save = false;

+ 108 - 0
src/lib/components/chat/Messages/Markdown/AlertRenderer.svelte

@@ -0,0 +1,108 @@
+<script lang="ts" context="module">
+	import { marked, type Token } from 'marked';
+
+	type AlertType = 'NOTE' | 'TIP' | 'IMPORTANT' | 'WARNING' | 'CAUTION';
+
+	interface AlertTheme {
+		border: string;
+		text: string;
+		icon: ComponentType;
+	}
+
+	export interface AlertData {
+		type: AlertType;
+		text: string;
+		tokens: Token[];
+	}
+
+	const alertStyles: Record<AlertType, AlertTheme> = {
+		NOTE: {
+			border: 'border-sky-500',
+			text: 'text-sky-500',
+			icon: Info
+		},
+		TIP: {
+			border: 'border-emerald-500',
+			text: 'text-emerald-500',
+			icon: LightBlub
+		},
+		IMPORTANT: {
+			border: 'border-purple-500',
+			text: 'text-purple-500',
+			icon: Star
+		},
+		WARNING: {
+			border: 'border-yellow-500',
+			text: 'text-yellow-500',
+			icon: ArrowRightCircle
+		},
+		CAUTION: {
+			border: 'border-rose-500',
+			text: 'text-rose-500',
+			icon: Bolt
+		}
+	};
+
+	export function alertComponent(token: Token): AlertData | false {
+		const regExpStr = `^(?:\\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\\])\\s*?\n*`;
+		const regExp = new RegExp(regExpStr);
+		const matches = token.text?.match(regExp);
+
+		if (matches && matches.length) {
+			const alertType = matches[1] as AlertType;
+			const newText = token.text.replace(regExp, '');
+			const newTokens = marked.lexer(newText);
+			return {
+				type: alertType,
+				text: newText,
+				tokens: newTokens
+			};
+		}
+		return false;
+	}
+</script>
+
+<script lang="ts">
+	import Info from '$lib/components/icons/Info.svelte';
+	import Star from '$lib/components/icons/Star.svelte';
+	import LightBlub from '$lib/components/icons/LightBlub.svelte';
+	import Bolt from '$lib/components/icons/Bolt.svelte';
+	import ArrowRightCircle from '$lib/components/icons/ArrowRightCircle.svelte';
+	import MarkdownTokens from './MarkdownTokens.svelte';
+	import type { ComponentType } from 'svelte';
+
+	export let token: Token;
+	export let alert: AlertData;
+	export let id = '';
+	export let tokenIdx = 0;
+	export let onTaskClick: ((event: MouseEvent) => void) | undefined = undefined;
+	export let onSourceClick: ((event: MouseEvent) => void) | undefined = undefined;
+</script>
+
+<!--
+
+Renders the following Markdown as alerts:
+
+> [!NOTE]
+> Example note
+
+> [!TIP]
+> Example tip
+
+> [!IMPORTANT]
+> Example important
+
+> [!CAUTION]
+> Example caution
+
+> [!WARNING]
+> Example warning
+
+-->
+<div class={`border-l-2 pl-2 ${alertStyles[alert.type].border}`}>
+	<p class={alertStyles[alert.type].text}>
+		<svelte:component this={alertStyles[alert.type].icon} className="inline-block size-4" />
+		<b>{alert.type}</b>
+	</p>
+	<MarkdownTokens id={`${id}-${tokenIdx}`} tokens={alert.tokens} {onTaskClick} {onSourceClick} />
+</div>

+ 1 - 1
src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens.svelte

@@ -31,7 +31,7 @@
 		{:else if token.text.includes(`<source_id`)}
 			<Source {id} {token} onClick={onSourceClick} />
 		{:else}
-			{token.text}
+			{@html html}
 		{/if}
 	{:else if token.type === 'link'}
 		{#if token.tokens}

+ 15 - 5
src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte

@@ -14,10 +14,13 @@
 	import CodeBlock from '$lib/components/chat/Messages/CodeBlock.svelte';
 	import MarkdownInlineTokens from '$lib/components/chat/Messages/Markdown/MarkdownInlineTokens.svelte';
 	import KatexRenderer from './KatexRenderer.svelte';
+	import AlertRenderer, { alertComponent } from './AlertRenderer.svelte';
 	import Collapsible from '$lib/components/common/Collapsible.svelte';
 	import Tooltip from '$lib/components/common/Tooltip.svelte';
 	import ArrowDownTray from '$lib/components/icons/ArrowDownTray.svelte';
+
 	import Source from './Source.svelte';
+	import { settings } from '$lib/stores';
 
 	const dispatch = createEventDispatcher();
 
@@ -84,6 +87,7 @@
 		{#if token.raw.includes('```')}
 			<CodeBlock
 				id={`${id}-${tokenIdx}`}
+				collapsed={$settings?.collapseCodeBlocks ?? false}
 				{token}
 				lang={token?.lang ?? ''}
 				code={token?.text ?? ''}
@@ -119,7 +123,7 @@
 									class="px-3! py-1.5! cursor-pointer border border-gray-100 dark:border-gray-850"
 									style={token.align[headerIdx] ? '' : `text-align: ${token.align[headerIdx]}`}
 								>
-									<div class="flex flex-col gap-1.5 text-left">
+									<div class="gap-1.5 text-left">
 										<div class="shrink-0 break-normal">
 											<MarkdownInlineTokens
 												id={`${id}-${tokenIdx}-header-${headerIdx}`}
@@ -140,7 +144,7 @@
 										class="px-3! py-1.5! text-gray-900 dark:text-white w-max border border-gray-100 dark:border-gray-850"
 										style={token.align[cellIdx] ? '' : `text-align: ${token.align[cellIdx]}`}
 									>
-										<div class="flex flex-col break-normal">
+										<div class="break-normal">
 											<MarkdownInlineTokens
 												id={`${id}-${tokenIdx}-row-${rowIdx}-${cellIdx}`}
 												tokens={cell.tokens}
@@ -170,9 +174,14 @@
 			</div>
 		</div>
 	{:else if token.type === 'blockquote'}
-		<blockquote dir="auto">
-			<svelte:self id={`${id}-${tokenIdx}`} tokens={token.tokens} {onTaskClick} {onSourceClick} />
-		</blockquote>
+		{@const alert = alertComponent(token)}
+		{#if alert}
+			<AlertRenderer {token} {alert} />
+		{:else}
+			<blockquote dir="auto">
+				<svelte:self id={`${id}-${tokenIdx}`} tokens={token.tokens} {onTaskClick} {onSourceClick} />
+			</blockquote>
+		{/if}
 	{:else if token.type === 'list'}
 		{#if token.ordered}
 			<ol start={token.start || 1}>
@@ -242,6 +251,7 @@
 	{:else if token.type === 'details'}
 		<Collapsible
 			title={token.summary}
+			open={$settings?.expandDetails ?? false}
 			attributes={token?.attributes}
 			className="w-full space-y-1"
 			dir="auto"

+ 3 - 0
src/lib/components/chat/Messages/Message.svelte

@@ -20,6 +20,7 @@
 
 	export let user;
 
+	export let gotoMessage;
 	export let showPreviousMessage;
 	export let showNextMessage;
 	export let updateChat;
@@ -57,6 +58,7 @@
 					: (Object.values(history.messages)
 							.filter((message) => message.parentId === null)
 							.map((message) => message.id) ?? [])}
+				{gotoMessage}
 				{showPreviousMessage}
 				{showNextMessage}
 				{editMessage}
@@ -70,6 +72,7 @@
 				{messageId}
 				isLastMessage={messageId === history.currentId}
 				siblings={history.messages[history.messages[messageId].parentId]?.childrenIds ?? []}
+				{gotoMessage}
 				{showPreviousMessage}
 				{showNextMessage}
 				{updateChat}

+ 30 - 0
src/lib/components/chat/Messages/MultiResponseMessages.svelte

@@ -58,6 +58,35 @@
 		}
 	}
 
+	const gotoMessage = async (modelIdx, messageIdx) => {
+		// Clamp messageIdx to ensure it's within valid range
+		groupedMessageIdsIdx[modelIdx] = Math.max(
+			0,
+			Math.min(messageIdx, groupedMessageIds[modelIdx].messageIds.length - 1)
+		);
+
+		// Get the messageId at the specified index
+		let messageId = groupedMessageIds[modelIdx].messageIds[groupedMessageIdsIdx[modelIdx]];
+		console.log(messageId);
+
+		// Traverse the branch to find the deepest child message
+		let messageChildrenIds = history.messages[messageId].childrenIds;
+		while (messageChildrenIds.length !== 0) {
+			messageId = messageChildrenIds.at(-1);
+			messageChildrenIds = history.messages[messageId].childrenIds;
+		}
+
+		// Update the current message ID in history
+		history.currentId = messageId;
+
+		// Await UI updates
+		await tick();
+		await updateChat();
+
+		// Trigger scrolling after navigation
+		triggerScroll();
+	};
+
 	const showPreviousMessage = async (modelIdx) => {
 		groupedMessageIdsIdx[modelIdx] = Math.max(0, groupedMessageIdsIdx[modelIdx] - 1);
 
@@ -224,6 +253,7 @@
 									messageId={_messageId}
 									isLastMessage={true}
 									siblings={groupedMessageIds[modelIdx].messageIds}
+									gotoMessage={(message, messageIdx) => gotoMessage(modelIdx, messageIdx)}
 									showPreviousMessage={() => showPreviousMessage(modelIdx)}
 									showNextMessage={() => showNextMessage(modelIdx)}
 									{updateChat}

+ 54 - 10
src/lib/components/chat/Messages/ResponseMessage.svelte

@@ -5,7 +5,7 @@
 	import { createEventDispatcher } from 'svelte';
 	import { onMount, tick, getContext } from 'svelte';
 	import type { Writable } from 'svelte/store';
-	import type { i18n as i18nType } from 'i18next';
+	import type { i18n as i18nType, t } from 'i18next';
 
 	const i18n = getContext<Writable<i18nType>>('i18n');
 
@@ -110,6 +110,7 @@
 
 	export let siblings;
 
+	export let gotoMessage: Function = () => {};
 	export let showPreviousMessage: Function;
 	export let showNextMessage: Function;
 
@@ -139,6 +140,8 @@
 	let editedContent = '';
 	let editTextAreaElement: HTMLTextAreaElement;
 
+	let messageIndexEdit = false;
+
 	let audioParts: Record<number, HTMLAudioElement | null> = {};
 	let speaking = false;
 	let speakingIdx: number | undefined;
@@ -559,7 +562,7 @@
 		<div class="flex-auto w-0 pl-1">
 			<Name>
 				<Tooltip content={model?.name ?? message.model} placement="top-start">
-					<span class="line-clamp-1">
+					<span class="line-clamp-1 text-black dark:text-white">
 						{model?.name ?? message.model}
 					</span>
 				</Tooltip>
@@ -739,7 +742,7 @@
 										{history}
 										content={message.content}
 										sources={message.sources}
-										floatingButtons={message?.done}
+										floatingButtons={message?.done && !readOnly}
 										save={!readOnly}
 										{model}
 										onTaskClick={async (e) => {
@@ -748,7 +751,9 @@
 										onSourceClick={async (id, idx) => {
 											console.log(id, idx);
 											let sourceButton = document.getElementById(`source-${message.id}-${idx}`);
-											const sourcesCollapsible = document.getElementById(`collapsible-sources`);
+											const sourcesCollapsible = document.getElementById(
+												`collapsible-${message.id}`
+											);
 
 											if (sourceButton) {
 												sourceButton.click();
@@ -844,11 +849,50 @@
 										</svg>
 									</button>
 
-									<div
-										class="text-sm tracking-widest font-semibold self-center dark:text-gray-100 min-w-fit"
-									>
-										{siblings.indexOf(message.id) + 1}/{siblings.length}
-									</div>
+									{#if messageIndexEdit}
+										<div
+											class="text-sm flex justify-center font-semibold self-center dark:text-gray-100 min-w-fit"
+										>
+											<input
+												id="message-index-input-{message.id}"
+												type="number"
+												value={siblings.indexOf(message.id) + 1}
+												min="1"
+												max={siblings.length}
+												on:focus={(e) => {
+													e.target.select();
+												}}
+												on:blur={(e) => {
+													gotoMessage(message, e.target.value - 1);
+													messageIndexEdit = false;
+												}}
+												on:keydown={(e) => {
+													if (e.key === 'Enter') {
+														gotoMessage(message, e.target.value - 1);
+														messageIndexEdit = false;
+													}
+												}}
+												class="bg-transparent font-semibold self-center dark:text-gray-100 min-w-fit outline-hidden"
+											/>/{siblings.length}
+										</div>
+									{:else}
+										<!-- svelte-ignore a11y-no-static-element-interactions -->
+										<div
+											class="text-sm tracking-widest font-semibold self-center dark:text-gray-100 min-w-fit"
+											on:dblclick={async () => {
+												messageIndexEdit = true;
+
+												await tick();
+												const input = document.getElementById(`message-index-input-${message.id}`);
+												if (input) {
+													input.focus();
+													input.select();
+												}
+											}}
+										>
+											{siblings.indexOf(message.id) + 1}/{siblings.length}
+										</div>
+									{/if}
 
 									<button
 										class="self-center p-1 hover:bg-black/5 dark:hover:bg-white/5 dark:hover:text-white hover:text-black rounded-md transition"
@@ -1269,7 +1313,7 @@
 										<Tooltip content={$i18n.t('Delete')} placement="bottom">
 											<button
 												type="button"
-												id="continue-response-button"
+												id="delete-response-button"
 												class="{isLastMessage
 													? 'visible'
 													: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition regenerate-response-button"

+ 96 - 11
src/lib/components/chat/Messages/UserMessage.svelte

@@ -27,6 +27,7 @@
 
 	export let siblings;
 
+	export let gotoMessage: Function;
 	export let showPreviousMessage: Function;
 	export let showNextMessage: Function;
 
@@ -38,6 +39,8 @@
 
 	let showDeleteConfirm = false;
 
+	let messageIndexEdit = false;
+
 	let edit = false;
 	let editedContent = '';
 	let messageEditTextAreaElement: HTMLTextAreaElement;
@@ -267,11 +270,52 @@
 											</svg>
 										</button>
 
-										<div
-											class="text-sm tracking-widest font-semibold self-center dark:text-gray-100"
-										>
-											{siblings.indexOf(message.id) + 1}/{siblings.length}
-										</div>
+										{#if messageIndexEdit}
+											<div
+												class="text-sm flex justify-center font-semibold self-center dark:text-gray-100 min-w-fit"
+											>
+												<input
+													id="message-index-input-{message.id}"
+													type="number"
+													value={siblings.indexOf(message.id) + 1}
+													min="1"
+													max={siblings.length}
+													on:focus={(e) => {
+														e.target.select();
+													}}
+													on:blur={(e) => {
+														gotoMessage(message, e.target.value - 1);
+														messageIndexEdit = false;
+													}}
+													on:keydown={(e) => {
+														if (e.key === 'Enter') {
+															gotoMessage(message, e.target.value - 1);
+															messageIndexEdit = false;
+														}
+													}}
+													class="bg-transparent font-semibold self-center dark:text-gray-100 min-w-fit outline-hidden"
+												/>/{siblings.length}
+											</div>
+										{:else}
+											<!-- svelte-ignore a11y-no-static-element-interactions -->
+											<div
+												class="text-sm tracking-widest font-semibold self-center dark:text-gray-100 min-w-fit"
+												on:dblclick={async () => {
+													messageIndexEdit = true;
+
+													await tick();
+													const input = document.getElementById(
+														`message-index-input-${message.id}`
+													);
+													if (input) {
+														input.focus();
+														input.select();
+													}
+												}}
+											>
+												{siblings.indexOf(message.id) + 1}/{siblings.length}
+											</div>
+										{/if}
 
 										<button
 											class="self-center p-1 hover:bg-black/5 dark:hover:bg-white/5 dark:hover:text-white hover:text-black rounded-md transition"
@@ -347,7 +391,7 @@
 								</button>
 							</Tooltip>
 
-							{#if !isFirstMessage && !readOnly}
+							{#if !readOnly && (!isFirstMessage || siblings.length > 1)}
 								<Tooltip content={$i18n.t('Delete')} placement="bottom">
 									<button
 										class="invisible group-hover:visible p-1 rounded-sm dark:hover:text-white hover:text-black transition"
@@ -398,11 +442,52 @@
 											</svg>
 										</button>
 
-										<div
-											class="text-sm tracking-widest font-semibold self-center dark:text-gray-100"
-										>
-											{siblings.indexOf(message.id) + 1}/{siblings.length}
-										</div>
+										{#if messageIndexEdit}
+											<div
+												class="text-sm flex justify-center font-semibold self-center dark:text-gray-100 min-w-fit"
+											>
+												<input
+													id="message-index-input-{message.id}"
+													type="number"
+													value={siblings.indexOf(message.id) + 1}
+													min="1"
+													max={siblings.length}
+													on:focus={(e) => {
+														e.target.select();
+													}}
+													on:blur={(e) => {
+														gotoMessage(message, e.target.value - 1);
+														messageIndexEdit = false;
+													}}
+													on:keydown={(e) => {
+														if (e.key === 'Enter') {
+															gotoMessage(message, e.target.value - 1);
+															messageIndexEdit = false;
+														}
+													}}
+													class="bg-transparent font-semibold self-center dark:text-gray-100 min-w-fit outline-hidden"
+												/>/{siblings.length}
+											</div>
+										{:else}
+											<!-- svelte-ignore a11y-no-static-element-interactions -->
+											<div
+												class="text-sm tracking-widest font-semibold self-center dark:text-gray-100 min-w-fit"
+												on:dblclick={async () => {
+													messageIndexEdit = true;
+
+													await tick();
+													const input = document.getElementById(
+														`message-index-input-${message.id}`
+													);
+													if (input) {
+														input.focus();
+														input.select();
+													}
+												}}
+											>
+												{siblings.indexOf(message.id) + 1}/{siblings.length}
+											</div>
+										{/if}
 
 										<button
 											class="self-center p-1 hover:bg-black/5 dark:hover:bg-white/5 dark:hover:text-white hover:text-black rounded-md transition"

+ 2 - 1
src/lib/components/chat/ModelSelector.svelte

@@ -46,7 +46,8 @@
 							model: model
 						}))}
 						showTemporaryChatControl={$user.role === 'user'
-							? ($user?.permissions?.chat?.temporary ?? true)
+							? ($user?.permissions?.chat?.temporary ?? true) &&
+								!($user?.permissions?.chat?.temporary_enforced ?? false)
 							: true}
 						bind:value={selectedModel}
 					/>

+ 148 - 42
src/lib/components/chat/ModelSelector/Selector.svelte

@@ -61,10 +61,11 @@
 	$: selectedModel = items.find((item) => item.value === value) ?? '';
 
 	let searchValue = '';
+
 	let selectedTag = '';
+	let selectedConnectionType = '';
 
 	let ollamaVersion = null;
-
 	let selectedModelIdx = 0;
 
 	const fuse = new Fuse(
@@ -72,7 +73,7 @@
 			const _item = {
 				...item,
 				modelName: item.model?.name,
-				tags: item.model?.info?.meta?.tags?.map((tag) => tag.name).join(' '),
+				tags: (item.model?.tags ?? []).map((tag) => tag.name).join(' '),
 				desc: item.model?.info?.meta?.description
 			};
 			return _item;
@@ -93,14 +94,61 @@
 					if (selectedTag === '') {
 						return true;
 					}
-					return item.model?.info?.meta?.tags?.map((tag) => tag.name).includes(selectedTag);
+					return (item.model?.tags ?? []).map((tag) => tag.name).includes(selectedTag);
 				})
-		: items.filter((item) => {
-				if (selectedTag === '') {
-					return true;
-				}
-				return item.model?.info?.meta?.tags?.map((tag) => tag.name).includes(selectedTag);
-			});
+				.filter((item) => {
+					if (selectedConnectionType === '') {
+						return true;
+					} else if (selectedConnectionType === 'ollama') {
+						return item.model?.owned_by === 'ollama';
+					} else if (selectedConnectionType === 'openai') {
+						return item.model?.owned_by === 'openai';
+					} else if (selectedConnectionType === 'direct') {
+						return item.model?.direct;
+					}
+				})
+		: items
+				.filter((item) => {
+					if (selectedTag === '') {
+						return true;
+					}
+					return (item.model?.tags ?? []).map((tag) => tag.name).includes(selectedTag);
+				})
+				.filter((item) => {
+					if (selectedConnectionType === '') {
+						return true;
+					} else if (selectedConnectionType === 'ollama') {
+						return item.model?.owned_by === 'ollama';
+					} else if (selectedConnectionType === 'openai') {
+						return item.model?.owned_by === 'openai';
+					} else if (selectedConnectionType === 'direct') {
+						return item.model?.direct;
+					}
+				});
+
+	$: if (selectedTag || selectedConnectionType) {
+		resetView();
+	} else {
+		resetView();
+	}
+
+	const resetView = async () => {
+		await tick();
+
+		const selectedInFiltered = filteredItems.findIndex((item) => item.value === value);
+
+		if (selectedInFiltered >= 0) {
+			// The selected model is visible in the current filter
+			selectedModelIdx = selectedInFiltered;
+		} else {
+			// The selected model is not visible, default to first item in filtered list
+			selectedModelIdx = 0;
+		}
+
+		await tick();
+		const item = document.querySelector(`[data-arrow-selected="true"]`);
+		item?.scrollIntoView({ block: 'center', inline: 'nearest', behavior: 'instant' });
+	};
 
 	const pullModelHandler = async () => {
 		const sanitizedModelTag = searchValue.trim().replace(/^ollama\s+(run|pull)\s+/, '');
@@ -234,7 +282,7 @@
 		ollamaVersion = await getOllamaVersion(localStorage.token).catch((error) => false);
 
 		if (items) {
-			tags = items.flatMap((item) => item.model?.info?.meta?.tags ?? []).map((tag) => tag.name);
+			tags = items.flatMap((item) => item.model?.tags ?? []).map((tag) => tag.name);
 
 			// Remove duplicates and sort
 			tags = Array.from(new Set(tags)).sort((a, b) => a.localeCompare(b));
@@ -262,8 +310,9 @@
 	bind:open={show}
 	onOpenChange={async () => {
 		searchValue = '';
-		selectedModelIdx = 0;
 		window.setTimeout(() => document.getElementById('model-search-input')?.focus(), 0);
+
+		resetView();
 	}}
 	closeFocus={false}
 >
@@ -325,29 +374,79 @@
 			{/if}
 
 			<div class="px-3 mb-2 max-h-64 overflow-y-auto scrollbar-hidden group relative">
-				{#if tags}
-					<div class=" flex w-full sticky">
+				{#if tags && items.filter((item) => !(item.model?.info?.meta?.hidden ?? false)).length > 0}
+					<div
+						class=" flex w-full sticky top-0 z-10 bg-white dark:bg-gray-850 overflow-x-auto scrollbar-none"
+						on:wheel={(e) => {
+							if (e.deltaY !== 0) {
+								e.preventDefault();
+								e.currentTarget.scrollLeft += e.deltaY;
+							}
+						}}
+					>
 						<div
-							class="flex gap-1 scrollbar-none overflow-x-auto w-fit text-center text-sm font-medium rounded-full bg-transparent px-1.5 pb-0.5"
+							class="flex gap-1 w-fit text-center text-sm font-medium rounded-full bg-transparent px-1.5 pb-0.5"
 							bind:this={tagsContainerElement}
 						>
 							<button
-								class="min-w-fit outline-none p-1.5 {selectedTag === ''
+								class="min-w-fit outline-none p-1.5 {selectedTag === '' &&
+								selectedConnectionType === ''
 									? ''
 									: 'text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'} transition capitalize"
 								on:click={() => {
+									selectedConnectionType = '';
 									selectedTag = '';
 								}}
 							>
 								{$i18n.t('All')}
 							</button>
 
+							{#if items.find((item) => item.model?.owned_by === 'ollama') && items.find((item) => item.model?.owned_by === 'openai')}
+								<button
+									class="min-w-fit outline-none p-1.5 {selectedConnectionType === 'ollama'
+										? ''
+										: 'text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'} transition capitalize"
+									on:click={() => {
+										selectedTag = '';
+										selectedConnectionType = 'ollama';
+									}}
+								>
+									{$i18n.t('Local')}
+								</button>
+								<button
+									class="min-w-fit outline-none p-1.5 {selectedConnectionType === 'openai'
+										? ''
+										: 'text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'} transition capitalize"
+									on:click={() => {
+										selectedTag = '';
+										selectedConnectionType = 'openai';
+									}}
+								>
+									{$i18n.t('External')}
+								</button>
+							{/if}
+
+							{#if items.find((item) => item.model?.direct)}
+								<button
+									class="min-w-fit outline-none p-1.5 {selectedConnectionType === 'direct'
+										? ''
+										: 'text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'} transition capitalize"
+									on:click={() => {
+										selectedTag = '';
+										selectedConnectionType = 'direct';
+									}}
+								>
+									{$i18n.t('Direct')}
+								</button>
+							{/if}
+
 							{#each tags as tag}
 								<button
 									class="min-w-fit outline-none p-1.5 {selectedTag === tag
 										? ''
 										: 'text-gray-300 dark:text-gray-600 hover:text-gray-700 dark:hover:text-white'} transition capitalize"
 									on:click={() => {
+										selectedConnectionType = '';
 										selectedTag = tag;
 									}}
 								>
@@ -358,7 +457,7 @@
 					</div>
 				{/if}
 
-				{#each filteredItems as item, index}
+				{#each filteredItems.filter((item) => !(item.model?.info?.meta?.hidden ?? false)) as item, index}
 					<button
 						aria-label="model-item"
 						class="flex w-full text-left font-medium line-clamp-1 select-none items-center rounded-button py-2 pl-3 pr-1.5 text-sm text-gray-700 dark:text-gray-100 outline-hidden transition-all duration-75 hover:bg-gray-100 dark:hover:bg-gray-800 rounded-lg cursor-pointer data-highlighted:bg-muted {index ===
@@ -366,6 +465,7 @@
 							? 'bg-gray-100 dark:bg-gray-800 group-hover:bg-transparent'
 							: ''}"
 						data-arrow-selected={index === selectedModelIdx}
+						data-value={item.value}
 						on:click={() => {
 							value = item.value;
 							selectedModelIdx = index;
@@ -374,9 +474,9 @@
 						}}
 					>
 						<div class="flex flex-col">
-							{#if $mobile && (item?.model?.info?.meta?.tags ?? []).length > 0}
+							{#if $mobile && (item?.model?.tags ?? []).length > 0}
 								<div class="flex gap-0.5 self-start h-full mb-1.5 -translate-x-1">
-									{#each item.model?.info?.meta.tags as tag}
+									{#each item.model?.tags.sort((a, b) => a.name.localeCompare(b.name)) as tag}
 										<div
 											class=" text-xs font-bold px-1 rounded-sm uppercase line-clamp-1 bg-gray-500/20 text-gray-700 dark:text-gray-200"
 										>
@@ -398,31 +498,37 @@
 													alt="Model"
 													class="rounded-full size-5 flex items-center mr-2"
 												/>
-												{item.label}
+
+												<div class="flex items-center line-clamp-1">
+													<div class="line-clamp-1">
+														{item.label}
+													</div>
+
+													{#if item.model.owned_by === 'ollama' && (item.model.ollama?.details?.parameter_size ?? '') !== ''}
+														<div class="flex ml-1 items-center translate-y-[0.5px]">
+															<Tooltip
+																content={`${
+																	item.model.ollama?.details?.quantization_level
+																		? item.model.ollama?.details?.quantization_level + ' '
+																		: ''
+																}${
+																	item.model.ollama?.size
+																		? `(${(item.model.ollama?.size / 1024 ** 3).toFixed(1)}GB)`
+																		: ''
+																}`}
+																className="self-end"
+															>
+																<span
+																	class=" text-xs font-medium text-gray-600 dark:text-gray-400 line-clamp-1"
+																	>{item.model.ollama?.details?.parameter_size ?? ''}</span
+																>
+															</Tooltip>
+														</div>
+													{/if}
+												</div>
 											</Tooltip>
 										</div>
 									</div>
-									{#if item.model.owned_by === 'ollama' && (item.model.ollama?.details?.parameter_size ?? '') !== ''}
-										<div class="flex ml-1 items-center translate-y-[0.5px]">
-											<Tooltip
-												content={`${
-													item.model.ollama?.details?.quantization_level
-														? item.model.ollama?.details?.quantization_level + ' '
-														: ''
-												}${
-													item.model.ollama?.size
-														? `(${(item.model.ollama?.size / 1024 ** 3).toFixed(1)}GB)`
-														: ''
-												}`}
-												className="self-end"
-											>
-												<span
-													class=" text-xs font-medium text-gray-600 dark:text-gray-400 line-clamp-1"
-													>{item.model.ollama?.details?.parameter_size ?? ''}</span
-												>
-											</Tooltip>
-										</div>
-									{/if}
 								</div>
 
 								<!-- {JSON.stringify(item.info)} -->
@@ -496,11 +602,11 @@
 									</Tooltip>
 								{/if}
 
-								{#if !$mobile && (item?.model?.info?.meta?.tags ?? []).length > 0}
+								{#if !$mobile && (item?.model?.tags ?? []).length > 0}
 									<div
 										class="flex gap-0.5 self-center items-center h-full translate-y-[0.5px] overflow-x-auto scrollbar-none"
 									>
-										{#each item.model?.info?.meta.tags as tag}
+										{#each item.model?.tags.sort((a, b) => a.name.localeCompare(b.name)) as tag}
 											<Tooltip content={tag.name} className="flex-shrink-0">
 												<div
 													class=" text-xs font-bold px-1 rounded-sm uppercase bg-gray-500/20 text-gray-700 dark:text-gray-200"

+ 13 - 29
src/lib/components/chat/Navbar.svelte

@@ -114,37 +114,21 @@
 							</div>
 						</button>
 					</Menu>
-				{:else if $mobile && ($user.role === 'admin' || $user?.permissions?.chat?.controls)}
-					<Tooltip content={$i18n.t('Controls')}>
-						<button
-							class=" flex cursor-pointer px-2 py-2 rounded-xl hover:bg-gray-50 dark:hover:bg-gray-850 transition"
-							on:click={async () => {
-								await showControls.set(!$showControls);
-							}}
-							aria-label="Controls"
-						>
-							<div class=" m-auto self-center">
-								<AdjustmentsHorizontal className=" size-5" strokeWidth="0.5" />
-							</div>
-						</button>
-					</Tooltip>
 				{/if}
 
-				{#if !$mobile && ($user.role === 'admin' || $user?.permissions?.chat?.controls)}
-					<Tooltip content={$i18n.t('Controls')}>
-						<button
-							class=" flex cursor-pointer px-2 py-2 rounded-xl hover:bg-gray-50 dark:hover:bg-gray-850 transition"
-							on:click={async () => {
-								await showControls.set(!$showControls);
-							}}
-							aria-label="Controls"
-						>
-							<div class=" m-auto self-center">
-								<AdjustmentsHorizontal className=" size-5" strokeWidth="0.5" />
-							</div>
-						</button>
-					</Tooltip>
-				{/if}
+				<Tooltip content={$i18n.t('Controls')}>
+					<button
+						class=" flex cursor-pointer px-2 py-2 rounded-xl hover:bg-gray-50 dark:hover:bg-gray-850 transition"
+						on:click={async () => {
+							await showControls.set(!$showControls);
+						}}
+						aria-label="Controls"
+					>
+						<div class=" m-auto self-center">
+							<AdjustmentsHorizontal className=" size-5" strokeWidth="0.5" />
+						</div>
+					</button>
+				</Tooltip>
 
 				<Tooltip content={$i18n.t('New Chat')}>
 					<button

+ 3 - 0
src/lib/components/chat/Placeholder.svelte

@@ -38,6 +38,8 @@
 	export let codeInterpreterEnabled = false;
 	export let webSearchEnabled = false;
 
+	export let toolServers = [];
+
 	let models = [];
 
 	const selectSuggestionPrompt = async (p) => {
@@ -196,6 +198,7 @@
 					bind:codeInterpreterEnabled
 					bind:webSearchEnabled
 					bind:atSelectedModel
+					{toolServers}
 					{transparentBackground}
 					{stopResponse}
 					{createMessagePair}

+ 15 - 13
src/lib/components/chat/Settings/Account.svelte

@@ -245,21 +245,23 @@
 				</div>
 			</div>
 
-			<div class="pt-2">
-				<div class="flex flex-col w-full">
-					<div class=" mb-1 text-xs font-medium">{$i18n.t('Notification Webhook')}</div>
-
-					<div class="flex-1">
-						<input
-							class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-hidden"
-							type="url"
-							placeholder={$i18n.t('Enter your webhook URL')}
-							bind:value={webhookUrl}
-							required
-						/>
+			{#if $config?.features?.enable_user_webhooks}
+				<div class="pt-2">
+					<div class="flex flex-col w-full">
+						<div class=" mb-1 text-xs font-medium">{$i18n.t('Notification Webhook')}</div>
+
+						<div class="flex-1">
+							<input
+								class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-hidden"
+								type="url"
+								placeholder={$i18n.t('Enter your webhook URL')}
+								bind:value={webhookUrl}
+								required
+							/>
+						</div>
 					</div>
 				</div>
-			</div>
+			{/if}
 		</div>
 
 		<div class="py-0.5">

+ 1 - 0
src/lib/components/chat/Settings/Advanced/AdvancedParams.svelte

@@ -961,6 +961,7 @@
 			<div class="flex w-full justify-between">
 				<div class=" self-center text-xs font-medium">
 					{$i18n.t('Context Length')}
+					{$i18n.t('(Ollama)')}
 				</div>
 
 				<button

+ 13 - 1
src/lib/components/chat/Settings/Connections/Connection.svelte

@@ -6,6 +6,7 @@
 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
 	import Cog6 from '$lib/components/icons/Cog6.svelte';
 	import AddConnectionModal from '$lib/components/AddConnectionModal.svelte';
+	import ConfirmDialog from '$lib/components/common/ConfirmDialog.svelte';
 
 	export let onDelete = () => {};
 	export let onSubmit = () => {};
@@ -17,6 +18,7 @@
 	export let config = {};
 
 	let showConfigModal = false;
+	let showDeleteConfirmDialog = false;
 </script>
 
 <AddConnectionModal
@@ -28,7 +30,9 @@
 		key,
 		config
 	}}
-	{onDelete}
+	onDelete={() => {
+		showDeleteConfirmDialog = true;
+	}}
 	onSubmit={(connection) => {
 		url = connection.url;
 		key = connection.key;
@@ -37,6 +41,14 @@
 	}}
 />
 
+<ConfirmDialog
+	bind:show={showDeleteConfirmDialog}
+	on:confirm={() => {
+		onDelete();
+		showConfigModal = false;
+	}}
+/>
+
 <div class="flex w-full gap-2 items-center">
 	<Tooltip
 		className="w-full relative"

+ 87 - 48
src/lib/components/chat/Settings/General.svelte

@@ -9,6 +9,7 @@
 	const i18n = getContext('i18n');
 
 	import AdvancedParams from './Advanced/AdvancedParams.svelte';
+	import Textarea from '$lib/components/common/Textarea.svelte';
 
 	export let saveSettings: Function;
 	export let getModels: Function;
@@ -40,7 +41,7 @@
 	};
 
 	// Advanced
-	let requestFormat = '';
+	let requestFormat = null;
 	let keepAlive: string | null = null;
 
 	let params = {
@@ -70,14 +71,74 @@
 		num_gpu: null
 	};
 
+	const validateJSON = (json) => {
+		try {
+			const obj = JSON.parse(json);
+
+			if (obj && typeof obj === 'object') {
+				return true;
+			}
+		} catch (e) {}
+		return false;
+	};
+
 	const toggleRequestFormat = async () => {
-		if (requestFormat === '') {
+		if (requestFormat === null) {
 			requestFormat = 'json';
 		} else {
-			requestFormat = '';
+			requestFormat = null;
 		}
 
-		saveSettings({ requestFormat: requestFormat !== '' ? requestFormat : undefined });
+		saveSettings({ requestFormat: requestFormat !== null ? requestFormat : undefined });
+	};
+
+	const saveHandler = async () => {
+		if (requestFormat !== null && requestFormat !== 'json') {
+			if (validateJSON(requestFormat) === false) {
+				toast.error($i18n.t('Invalid JSON schema'));
+				return;
+			} else {
+				requestFormat = JSON.parse(requestFormat);
+			}
+		}
+
+		saveSettings({
+			system: system !== '' ? system : undefined,
+			params: {
+				stream_response: params.stream_response !== null ? params.stream_response : undefined,
+				function_calling: params.function_calling !== null ? params.function_calling : undefined,
+				seed: (params.seed !== null ? params.seed : undefined) ?? undefined,
+				stop: params.stop ? params.stop.split(',').filter((e) => e) : undefined,
+				temperature: params.temperature !== null ? params.temperature : undefined,
+				reasoning_effort: params.reasoning_effort !== null ? params.reasoning_effort : undefined,
+				logit_bias: params.logit_bias !== null ? params.logit_bias : undefined,
+				frequency_penalty: params.frequency_penalty !== null ? params.frequency_penalty : undefined,
+				presence_penalty: params.frequency_penalty !== null ? params.frequency_penalty : undefined,
+				repeat_penalty: params.frequency_penalty !== null ? params.frequency_penalty : undefined,
+				repeat_last_n: params.repeat_last_n !== null ? params.repeat_last_n : undefined,
+				mirostat: params.mirostat !== null ? params.mirostat : undefined,
+				mirostat_eta: params.mirostat_eta !== null ? params.mirostat_eta : undefined,
+				mirostat_tau: params.mirostat_tau !== null ? params.mirostat_tau : undefined,
+				top_k: params.top_k !== null ? params.top_k : undefined,
+				top_p: params.top_p !== null ? params.top_p : undefined,
+				min_p: params.min_p !== null ? params.min_p : undefined,
+				tfs_z: params.tfs_z !== null ? params.tfs_z : undefined,
+				num_ctx: params.num_ctx !== null ? params.num_ctx : undefined,
+				num_batch: params.num_batch !== null ? params.num_batch : undefined,
+				num_keep: params.num_keep !== null ? params.num_keep : undefined,
+				max_tokens: params.max_tokens !== null ? params.max_tokens : undefined,
+				use_mmap: params.use_mmap !== null ? params.use_mmap : undefined,
+				use_mlock: params.use_mlock !== null ? params.use_mlock : undefined,
+				num_thread: params.num_thread !== null ? params.num_thread : undefined,
+				num_gpu: params.num_gpu !== null ? params.num_gpu : undefined
+			},
+			keepAlive: keepAlive ? (isNaN(keepAlive) ? keepAlive : parseInt(keepAlive)) : undefined,
+			requestFormat: requestFormat !== null ? requestFormat : undefined
+		});
+		dispatch('save');
+
+		requestFormat =
+			typeof requestFormat === 'object' ? JSON.stringify(requestFormat, null, 2) : requestFormat;
 	};
 
 	onMount(async () => {
@@ -88,7 +149,12 @@
 		notificationEnabled = $settings.notificationEnabled ?? false;
 		system = $settings.system ?? '';
 
-		requestFormat = $settings.requestFormat ?? '';
+		requestFormat = $settings.requestFormat ?? null;
+		if (requestFormat !== null && requestFormat !== 'json') {
+			requestFormat =
+				typeof requestFormat === 'object' ? JSON.stringify(requestFormat, null, 2) : requestFormat;
+		}
+
 		keepAlive = $settings.keepAlive ?? null;
 
 		params = { ...params, ...$settings.params };
@@ -270,7 +336,7 @@
 					<AdvancedParams admin={$user?.role === 'admin'} bind:params />
 					<hr class=" border-gray-100 dark:border-gray-850" />
 
-					<div class=" py-1 w-full justify-between">
+					<div class=" w-full justify-between">
 						<div class="flex w-full justify-between">
 							<div class=" self-center text-xs font-medium">{$i18n.t('Keep Alive')}</div>
 
@@ -302,8 +368,8 @@
 					</div>
 
 					<div>
-						<div class=" py-1 flex w-full justify-between">
-							<div class=" self-center text-sm font-medium">{$i18n.t('Request Mode')}</div>
+						<div class=" flex w-full justify-between">
+							<div class=" self-center text-xs font-medium">{$i18n.t('Request Mode')}</div>
 
 							<button
 								class="p-1 px-3 text-xs flex rounded-sm transition"
@@ -311,9 +377,9 @@
 									toggleRequestFormat();
 								}}
 							>
-								{#if requestFormat === ''}
+								{#if requestFormat === null}
 									<span class="ml-2 self-center"> {$i18n.t('Default')} </span>
-								{:else if requestFormat === 'json'}
+								{:else}
 									<!-- <svg
                             xmlns="http://www.w3.org/2000/svg"
                             viewBox="0 0 20 20"
@@ -328,6 +394,16 @@
 								{/if}
 							</button>
 						</div>
+
+						{#if requestFormat !== null}
+							<div class="flex mt-1 space-x-2">
+								<Textarea
+									className="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-hidden"
+									placeholder={$i18n.t('e.g. "json" or a JSON schema')}
+									bind:value={requestFormat}
+								/>
+							</div>
+						{/if}
 					</div>
 				{/if}
 			</div>
@@ -338,44 +414,7 @@
 		<button
 			class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
 			on:click={() => {
-				saveSettings({
-					system: system !== '' ? system : undefined,
-					params: {
-						stream_response: params.stream_response !== null ? params.stream_response : undefined,
-						function_calling:
-							params.function_calling !== null ? params.function_calling : undefined,
-						seed: (params.seed !== null ? params.seed : undefined) ?? undefined,
-						stop: params.stop ? params.stop.split(',').filter((e) => e) : undefined,
-						temperature: params.temperature !== null ? params.temperature : undefined,
-						reasoning_effort:
-							params.reasoning_effort !== null ? params.reasoning_effort : undefined,
-						logit_bias: params.logit_bias !== null ? params.logit_bias : undefined,
-						frequency_penalty:
-							params.frequency_penalty !== null ? params.frequency_penalty : undefined,
-						presence_penalty:
-							params.frequency_penalty !== null ? params.frequency_penalty : undefined,
-						repeat_penalty:
-							params.frequency_penalty !== null ? params.frequency_penalty : undefined,
-						repeat_last_n: params.repeat_last_n !== null ? params.repeat_last_n : undefined,
-						mirostat: params.mirostat !== null ? params.mirostat : undefined,
-						mirostat_eta: params.mirostat_eta !== null ? params.mirostat_eta : undefined,
-						mirostat_tau: params.mirostat_tau !== null ? params.mirostat_tau : undefined,
-						top_k: params.top_k !== null ? params.top_k : undefined,
-						top_p: params.top_p !== null ? params.top_p : undefined,
-						min_p: params.min_p !== null ? params.min_p : undefined,
-						tfs_z: params.tfs_z !== null ? params.tfs_z : undefined,
-						num_ctx: params.num_ctx !== null ? params.num_ctx : undefined,
-						num_batch: params.num_batch !== null ? params.num_batch : undefined,
-						num_keep: params.num_keep !== null ? params.num_keep : undefined,
-						max_tokens: params.max_tokens !== null ? params.max_tokens : undefined,
-						use_mmap: params.use_mmap !== null ? params.use_mmap : undefined,
-						use_mlock: params.use_mlock !== null ? params.use_mlock : undefined,
-						num_thread: params.num_thread !== null ? params.num_thread : undefined,
-						num_gpu: params.num_gpu !== null ? params.num_gpu : undefined
-					},
-					keepAlive: keepAlive ? (isNaN(keepAlive) ? keepAlive : parseInt(keepAlive)) : undefined
-				});
-				dispatch('save');
+				saveHandler();
 			}}
 		>
 			{$i18n.t('Save')}

+ 90 - 1
src/lib/components/chat/Settings/Interface.svelte

@@ -30,15 +30,21 @@
 	// Interface
 	let defaultModelId = '';
 	let showUsername = false;
+	let notificationSound = true;
+
 	let richTextInput = true;
+	let promptAutocomplete = false;
+
 	let largeTextAsFile = false;
-	let notificationSound = true;
 
 	let landingPageMode = '';
 	let chatBubble = true;
 	let chatDirection: 'LTR' | 'RTL' = 'LTR';
 	let ctrlEnterToSend = false;
 
+	let collapseCodeBlocks = false;
+	let expandDetails = false;
+
 	let imageCompression = false;
 	let imageCompressionSize = {
 		width: '',
@@ -55,11 +61,26 @@
 
 	let webSearch = null;
 
+	const toggleExpandDetails = () => {
+		expandDetails = !expandDetails;
+		saveSettings({ expandDetails });
+	};
+
+	const toggleCollapseCodeBlocks = () => {
+		collapseCodeBlocks = !collapseCodeBlocks;
+		saveSettings({ collapseCodeBlocks });
+	};
+
 	const toggleSplitLargeChunks = async () => {
 		splitLargeChunks = !splitLargeChunks;
 		saveSettings({ splitLargeChunks: splitLargeChunks });
 	};
 
+	const togglePromptAutocomplete = async () => {
+		promptAutocomplete = !promptAutocomplete;
+		saveSettings({ promptAutocomplete: promptAutocomplete });
+	};
+
 	const togglesScrollOnBranchChange = async () => {
 		scrollOnBranchChange = !scrollOnBranchChange;
 		saveSettings({ scrollOnBranchChange: scrollOnBranchChange });
@@ -225,8 +246,12 @@
 		voiceInterruption = $settings.voiceInterruption ?? false;
 
 		richTextInput = $settings.richTextInput ?? true;
+		promptAutocomplete = $settings.promptAutocomplete ?? false;
 		largeTextAsFile = $settings.largeTextAsFile ?? false;
 
+		collapseCodeBlocks = $settings.collapseCodeBlocks ?? false;
+		expandDetails = $settings.expandDetails ?? false;
+
 		landingPageMode = $settings.landingPageMode ?? '';
 		chatBubble = $settings.chatBubble ?? true;
 		widescreenMode = $settings.widescreenMode ?? false;
@@ -548,6 +573,30 @@
 				</div>
 			</div>
 
+			{#if $config?.features?.enable_autocomplete_generation && richTextInput}
+				<div>
+					<div class=" py-0.5 flex w-full justify-between">
+						<div class=" self-center text-xs">
+							{$i18n.t('Prompt Autocompletion')}
+						</div>
+
+						<button
+							class="p-1 px-3 text-xs flex rounded-sm transition"
+							on:click={() => {
+								togglePromptAutocomplete();
+							}}
+							type="button"
+						>
+							{#if promptAutocomplete === true}
+								<span class="ml-2 self-center">{$i18n.t('On')}</span>
+							{:else}
+								<span class="ml-2 self-center">{$i18n.t('Off')}</span>
+							{/if}
+						</button>
+					</div>
+				</div>
+			{/if}
+
 			<div>
 				<div class=" py-0.5 flex w-full justify-between">
 					<div class=" self-center text-xs">
@@ -570,6 +619,46 @@
 				</div>
 			</div>
 
+			<div>
+				<div class=" py-0.5 flex w-full justify-between">
+					<div class=" self-center text-xs">{$i18n.t('Always Collapse Code Blocks')}</div>
+
+					<button
+						class="p-1 px-3 text-xs flex rounded-sm transition"
+						on:click={() => {
+							toggleCollapseCodeBlocks();
+						}}
+						type="button"
+					>
+						{#if collapseCodeBlocks === true}
+							<span class="ml-2 self-center">{$i18n.t('On')}</span>
+						{:else}
+							<span class="ml-2 self-center">{$i18n.t('Off')}</span>
+						{/if}
+					</button>
+				</div>
+			</div>
+
+			<div>
+				<div class=" py-0.5 flex w-full justify-between">
+					<div class=" self-center text-xs">{$i18n.t('Always Expand Details')}</div>
+
+					<button
+						class="p-1 px-3 text-xs flex rounded-sm transition"
+						on:click={() => {
+							toggleExpandDetails();
+						}}
+						type="button"
+					>
+						{#if expandDetails === true}
+							<span class="ml-2 self-center">{$i18n.t('On')}</span>
+						{:else}
+							<span class="ml-2 self-center">{$i18n.t('Off')}</span>
+						{/if}
+					</button>
+				</div>
+			</div>
+
 			<div>
 				<div class=" py-0.5 flex w-full justify-between">
 					<div class=" self-center text-xs">

Some files were not shown because too many files changed in this diff