|
@@ -33,7 +33,7 @@ from open_webui.config import (
|
|
|
WHISPER_MODEL_AUTO_UPDATE,
|
|
|
WHISPER_MODEL_DIR,
|
|
|
CACHE_DIR,
|
|
|
- WHISPER_LANGUAGE
|
|
|
+ WHISPER_LANGUAGE,
|
|
|
)
|
|
|
|
|
|
from open_webui.constants import ERROR_MESSAGES
|
|
@@ -154,6 +154,7 @@ class STTConfigForm(BaseModel):
|
|
|
AZURE_BASE_URL: str
|
|
|
AZURE_MAX_SPEAKERS: str
|
|
|
|
|
|
+
|
|
|
class AudioConfigUpdateForm(BaseModel):
|
|
|
tts: TTSConfigForm
|
|
|
stt: STTConfigForm
|
|
@@ -184,7 +185,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
|
|
|
"AZURE_REGION": request.app.state.config.AUDIO_STT_AZURE_REGION,
|
|
|
"AZURE_LOCALES": request.app.state.config.AUDIO_STT_AZURE_LOCALES,
|
|
|
"AZURE_BASE_URL": request.app.state.config.AUDIO_STT_AZURE_BASE_URL,
|
|
|
- "AZURE_MAX_SPEAKERS": request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS,
|
|
|
+ "AZURE_MAX_SPEAKERS": request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS,
|
|
|
},
|
|
|
}
|
|
|
|
|
@@ -215,7 +216,9 @@ async def update_audio_config(
|
|
|
request.app.state.config.AUDIO_STT_AZURE_REGION = form_data.stt.AZURE_REGION
|
|
|
request.app.state.config.AUDIO_STT_AZURE_LOCALES = form_data.stt.AZURE_LOCALES
|
|
|
request.app.state.config.AUDIO_STT_AZURE_BASE_URL = form_data.stt.AZURE_BASE_URL
|
|
|
- request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS = form_data.stt.AZURE_MAX_SPEAKERS
|
|
|
+ request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS = (
|
|
|
+ form_data.stt.AZURE_MAX_SPEAKERS
|
|
|
+ )
|
|
|
|
|
|
if request.app.state.config.STT_ENGINE == "":
|
|
|
request.app.state.faster_whisper_model = set_faster_whisper_model(
|
|
@@ -245,7 +248,7 @@ async def update_audio_config(
|
|
|
"AZURE_REGION": request.app.state.config.AUDIO_STT_AZURE_REGION,
|
|
|
"AZURE_LOCALES": request.app.state.config.AUDIO_STT_AZURE_LOCALES,
|
|
|
"AZURE_BASE_URL": request.app.state.config.AUDIO_STT_AZURE_BASE_URL,
|
|
|
- "AZURE_MAX_SPEAKERS": request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS,
|
|
|
+ "AZURE_MAX_SPEAKERS": request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS,
|
|
|
},
|
|
|
}
|
|
|
|
|
@@ -509,7 +512,7 @@ def transcribe(request: Request, file_path):
|
|
|
file_path,
|
|
|
beam_size=5,
|
|
|
vad_filter=request.app.state.config.WHISPER_VAD_FILTER,
|
|
|
- language=WHISPER_LANGUAGE
|
|
|
+ language=WHISPER_LANGUAGE,
|
|
|
)
|
|
|
log.info(
|
|
|
"Detected language '%s' with probability %f"
|
|
@@ -698,7 +701,10 @@ def transcribe(request: Request, file_path):
|
|
|
)
|
|
|
}
|
|
|
|
|
|
- url = base_url or f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
|
|
+ url = (
|
|
|
+ base_url
|
|
|
+ or f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
|
|
+ )
|
|
|
|
|
|
# Use context manager to ensure file is properly closed
|
|
|
with open(file_path, "rb") as audio_file:
|