Timothy Jaeryang Baek 2 weken geleden
bovenliggende
commit
e7332fd6fe
3 gewijzigde bestanden met toevoegingen van 15 en 1 verwijderingen
  1. 8 0
      backend/open_webui/config.py
  2. 2 0
      backend/open_webui/main.py
  3. 5 1
      backend/open_webui/routers/audio.py

+ 8 - 0
backend/open_webui/config.py

@@ -2489,6 +2489,13 @@ WHISPER_MODEL_AUTO_UPDATE = (
     and os.environ.get("WHISPER_MODEL_AUTO_UPDATE", "").lower() == "true"
 )
 
+WHISPER_VAD_FILTER = PersistentConfig(
+    "WHISPER_VAD_FILTER",
+    "audio.stt.whisper_vad_filter",
+    os.getenv("WHISPER_VAD_FILTER", "False").lower() == "true",
+)
+
+
 # Add Deepgram configuration
 DEEPGRAM_API_KEY = PersistentConfig(
     "DEEPGRAM_API_KEY",
@@ -2496,6 +2503,7 @@ DEEPGRAM_API_KEY = PersistentConfig(
     os.getenv("DEEPGRAM_API_KEY", ""),
 )
 
+
 AUDIO_STT_OPENAI_API_BASE_URL = PersistentConfig(
     "AUDIO_STT_OPENAI_API_BASE_URL",
     "audio.stt.openai.api_base_url",

+ 2 - 0
backend/open_webui/main.py

@@ -166,6 +166,7 @@ from open_webui.config import (
     FIRECRAWL_API_KEY,
     WEB_LOADER_ENGINE,
     WHISPER_MODEL,
+    WHISPER_VAD_FILTER,
     DEEPGRAM_API_KEY,
     WHISPER_MODEL_AUTO_UPDATE,
     WHISPER_MODEL_DIR,
@@ -789,6 +790,7 @@ app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
 app.state.config.STT_MODEL = AUDIO_STT_MODEL
 
 app.state.config.WHISPER_MODEL = WHISPER_MODEL
+app.state.config.WHISPER_VAD_FILTER = WHISPER_VAD_FILTER
 app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY
 
 app.state.config.AUDIO_STT_AZURE_API_KEY = AUDIO_STT_AZURE_API_KEY

+ 5 - 1
backend/open_webui/routers/audio.py

@@ -497,7 +497,11 @@ def transcribe(request: Request, file_path):
             )
 
         model = request.app.state.faster_whisper_model
-        segments, info = model.transcribe(file_path, beam_size=5, vad_filter=True)
+        segments, info = model.transcribe(
+            file_path,
+            beam_size=5,
+            vad_filter=request.app.state.config.WHISPER_VAD_FILTER,
+        )
         log.info(
             "Detected language '%s' with probability %f"
             % (info.language, info.language_probability)