Browse Source

Fix S3 allowed characters in Tags.

In Amazon S3 storage, only the following charaters allowed in Tagging "letters (a-z, A-Z), numbers (0-9), and spaces representable in UTF-8, and the following characters: + - = . _ : / @". Added a sanitizer function to clear tags before the put request.
Suleiman Elkhoury 1 month ago
parent
commit
e10d6ad79c
1 changed files with 15 additions and 1 deletions
  1. 15 1
      backend/open_webui/storage/provider.py

+ 15 - 1
backend/open_webui/storage/provider.py

@@ -2,6 +2,7 @@ import os
 import shutil
 import shutil
 import json
 import json
 import logging
 import logging
+import re
 from abc import ABC, abstractmethod
 from abc import ABC, abstractmethod
 from typing import BinaryIO, Tuple, Dict
 from typing import BinaryIO, Tuple, Dict
 
 
@@ -136,6 +137,11 @@ class S3StorageProvider(StorageProvider):
         self.bucket_name = S3_BUCKET_NAME
         self.bucket_name = S3_BUCKET_NAME
         self.key_prefix = S3_KEY_PREFIX if S3_KEY_PREFIX else ""
         self.key_prefix = S3_KEY_PREFIX if S3_KEY_PREFIX else ""
 
 
+    @staticmethod
+    def sanitize_tag_value(s: str) -> str:
+        """Only include S3 allowed characters."""
+        return re.sub(r"[^a-zA-Z0-9 äöüÄÖÜß\+\-=\._:/@]", "", s)
+
     def upload_file(
     def upload_file(
         self, file: BinaryIO, filename: str, tags: Dict[str, str]
         self, file: BinaryIO, filename: str, tags: Dict[str, str]
     ) -> Tuple[bytes, str]:
     ) -> Tuple[bytes, str]:
@@ -145,7 +151,15 @@ class S3StorageProvider(StorageProvider):
         try:
         try:
             self.s3_client.upload_file(file_path, self.bucket_name, s3_key)
             self.s3_client.upload_file(file_path, self.bucket_name, s3_key)
             if S3_ENABLE_TAGGING and tags:
             if S3_ENABLE_TAGGING and tags:
-                tagging = {"TagSet": [{"Key": k, "Value": v} for k, v in tags.items()]}
+                sanitized_tags = {
+                    self.sanitize_tag_value(k): self.sanitize_tag_value(v)
+                    for k, v in tags.items()
+                }
+                tagging = {
+                    "TagSet": [
+                        {"Key": k, "Value": v} for k, v in sanitized_tags.items()
+                    ]
+                }
                 self.s3_client.put_object_tagging(
                 self.s3_client.put_object_tagging(
                     Bucket=self.bucket_name,
                     Bucket=self.bucket_name,
                     Key=s3_key,
                     Key=s3_key,