|
@@ -138,6 +138,7 @@ class TTSConfigForm(BaseModel):
|
|
VOICE: str
|
|
VOICE: str
|
|
SPLIT_ON: str
|
|
SPLIT_ON: str
|
|
AZURE_SPEECH_REGION: str
|
|
AZURE_SPEECH_REGION: str
|
|
|
|
+ AZURE_SPEECH_BASE_URL: str
|
|
AZURE_SPEECH_OUTPUT_FORMAT: str
|
|
AZURE_SPEECH_OUTPUT_FORMAT: str
|
|
|
|
|
|
|
|
|
|
@@ -172,6 +173,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
|
|
"VOICE": request.app.state.config.TTS_VOICE,
|
|
"VOICE": request.app.state.config.TTS_VOICE,
|
|
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
|
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
|
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
|
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
|
|
|
+ "AZURE_SPEECH_BASE_URL": request.app.state.config.TTS_AZURE_SPEECH_BASE_URL,
|
|
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
|
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
|
},
|
|
},
|
|
"stt": {
|
|
"stt": {
|
|
@@ -202,6 +204,9 @@ async def update_audio_config(
|
|
request.app.state.config.TTS_VOICE = form_data.tts.VOICE
|
|
request.app.state.config.TTS_VOICE = form_data.tts.VOICE
|
|
request.app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
|
|
request.app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
|
|
request.app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION
|
|
request.app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION
|
|
|
|
+ request.app.state.config.TTS_AZURE_SPEECH_BASE_URL = (
|
|
|
|
+ form_data.tts.AZURE_SPEECH_BASE_URL
|
|
|
|
+ )
|
|
request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = (
|
|
request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = (
|
|
form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT
|
|
form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT
|
|
)
|
|
)
|
|
@@ -235,6 +240,7 @@ async def update_audio_config(
|
|
"VOICE": request.app.state.config.TTS_VOICE,
|
|
"VOICE": request.app.state.config.TTS_VOICE,
|
|
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
|
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
|
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
|
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
|
|
|
+ "AZURE_SPEECH_BASE_URL": request.app.state.config.TTS_AZURE_SPEECH_BASE_URL,
|
|
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
|
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
|
},
|
|
},
|
|
"stt": {
|
|
"stt": {
|
|
@@ -406,7 +412,8 @@ async def speech(request: Request, user=Depends(get_verified_user)):
|
|
log.exception(e)
|
|
log.exception(e)
|
|
raise HTTPException(status_code=400, detail="Invalid JSON payload")
|
|
raise HTTPException(status_code=400, detail="Invalid JSON payload")
|
|
|
|
|
|
- region = request.app.state.config.TTS_AZURE_SPEECH_REGION
|
|
|
|
|
|
+ region = request.app.state.config.TTS_AZURE_SPEECH_REGION or "eastus"
|
|
|
|
+ base_url = request.app.state.config.TTS_AZURE_SPEECH_BASE_URL
|
|
language = request.app.state.config.TTS_VOICE
|
|
language = request.app.state.config.TTS_VOICE
|
|
locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:1])
|
|
locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:1])
|
|
output_format = request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
|
output_format = request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
|
@@ -420,7 +427,8 @@ async def speech(request: Request, user=Depends(get_verified_user)):
|
|
timeout=timeout, trust_env=True
|
|
timeout=timeout, trust_env=True
|
|
) as session:
|
|
) as session:
|
|
async with session.post(
|
|
async with session.post(
|
|
- f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1",
|
|
|
|
|
|
+ (base_url or f"https://{region}.tts.speech.microsoft.com")
|
|
|
|
+ + "/cognitiveservices/v1",
|
|
headers={
|
|
headers={
|
|
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY,
|
|
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY,
|
|
"Content-Type": "application/ssml+xml",
|
|
"Content-Type": "application/ssml+xml",
|
|
@@ -651,10 +659,10 @@ def transcribe(request: Request, file_path):
|
|
)
|
|
)
|
|
|
|
|
|
api_key = request.app.state.config.AUDIO_STT_AZURE_API_KEY
|
|
api_key = request.app.state.config.AUDIO_STT_AZURE_API_KEY
|
|
- region = request.app.state.config.AUDIO_STT_AZURE_REGION
|
|
|
|
|
|
+ region = request.app.state.config.AUDIO_STT_AZURE_REGION or "eastus"
|
|
locales = request.app.state.config.AUDIO_STT_AZURE_LOCALES
|
|
locales = request.app.state.config.AUDIO_STT_AZURE_LOCALES
|
|
base_url = request.app.state.config.AUDIO_STT_AZURE_BASE_URL
|
|
base_url = request.app.state.config.AUDIO_STT_AZURE_BASE_URL
|
|
- max_speakers = request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS
|
|
|
|
|
|
+ max_speakers = request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS or 3
|
|
|
|
|
|
# IF NO LOCALES, USE DEFAULTS
|
|
# IF NO LOCALES, USE DEFAULTS
|
|
if len(locales) < 2:
|
|
if len(locales) < 2:
|
|
@@ -681,12 +689,6 @@ def transcribe(request: Request, file_path):
|
|
detail="Azure API key is required for Azure STT",
|
|
detail="Azure API key is required for Azure STT",
|
|
)
|
|
)
|
|
|
|
|
|
- if not base_url and not region:
|
|
|
|
- raise HTTPException(
|
|
|
|
- status_code=400,
|
|
|
|
- detail="Azure region or base url is required for Azure STT",
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
r = None
|
|
r = None
|
|
try:
|
|
try:
|
|
# Prepare the request
|
|
# Prepare the request
|
|
@@ -702,9 +704,8 @@ def transcribe(request: Request, file_path):
|
|
}
|
|
}
|
|
|
|
|
|
url = (
|
|
url = (
|
|
- base_url
|
|
|
|
- or f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
|
|
|
- )
|
|
|
|
|
|
+ base_url or f"https://{region}.api.cognitive.microsoft.com"
|
|
|
|
+ ) + "/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
|
|
|
|
|
# Use context manager to ensure file is properly closed
|
|
# Use context manager to ensure file is properly closed
|
|
with open(file_path, "rb") as audio_file:
|
|
with open(file_path, "rb") as audio_file:
|
|
@@ -939,7 +940,10 @@ def get_available_voices(request) -> dict:
|
|
elif request.app.state.config.TTS_ENGINE == "azure":
|
|
elif request.app.state.config.TTS_ENGINE == "azure":
|
|
try:
|
|
try:
|
|
region = request.app.state.config.TTS_AZURE_SPEECH_REGION
|
|
region = request.app.state.config.TTS_AZURE_SPEECH_REGION
|
|
- url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
|
|
|
|
|
|
+ base_url = request.app.state.config.TTS_AZURE_SPEECH_BASE_URL
|
|
|
|
+ url = (
|
|
|
|
+ base_url or f"https://{region}.tts.speech.microsoft.com"
|
|
|
|
+ ) + "/cognitiveservices/voices/list"
|
|
headers = {
|
|
headers = {
|
|
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY
|
|
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY
|
|
}
|
|
}
|