Audio.svelte 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757
  1. <script lang="ts">
  2. import { toast } from 'svelte-sonner';
  3. import { createEventDispatcher, onMount, getContext } from 'svelte';
  4. const dispatch = createEventDispatcher();
  5. import { getBackendConfig } from '$lib/apis';
  6. import {
  7. getAudioConfig,
  8. updateAudioConfig,
  9. getModels as _getModels,
  10. getVoices as _getVoices
  11. } from '$lib/apis/audio';
  12. import { config, settings } from '$lib/stores';
  13. import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
  14. import { TTS_RESPONSE_SPLIT } from '$lib/types';
  15. import type { Writable } from 'svelte/store';
  16. import type { i18n as i18nType } from 'i18next';
  17. const i18n = getContext<Writable<i18nType>>('i18n');
  18. export let saveHandler: () => void;
  19. // Audio
  20. let TTS_OPENAI_API_BASE_URL = '';
  21. let TTS_OPENAI_API_KEY = '';
  22. let TTS_API_KEY = '';
  23. let TTS_ENGINE = '';
  24. let TTS_MODEL = '';
  25. let TTS_VOICE = '';
  26. let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
  27. let TTS_AZURE_SPEECH_REGION = '';
  28. let TTS_AZURE_SPEECH_BASE_URL = '';
  29. let TTS_AZURE_SPEECH_OUTPUT_FORMAT = '';
  30. let STT_OPENAI_API_BASE_URL = '';
  31. let STT_OPENAI_API_KEY = '';
  32. let STT_ENGINE = '';
  33. let STT_MODEL = '';
  34. let STT_SUPPORTED_CONTENT_TYPES = '';
  35. let STT_WHISPER_MODEL = '';
  36. let STT_AZURE_API_KEY = '';
  37. let STT_AZURE_REGION = '';
  38. let STT_AZURE_LOCALES = '';
  39. let STT_AZURE_BASE_URL = '';
  40. let STT_AZURE_MAX_SPEAKERS = '';
  41. let STT_DEEPGRAM_API_KEY = '';
  42. let STT_WHISPER_MODEL_LOADING = false;
  43. // eslint-disable-next-line no-undef
  44. let voices: SpeechSynthesisVoice[] = [];
  45. let models: Awaited<ReturnType<typeof _getModels>>['models'] = [];
  46. const getModels = async () => {
  47. if (TTS_ENGINE === '') {
  48. models = [];
  49. } else {
  50. const res = await _getModels(
  51. localStorage.token,
  52. $config?.features?.enable_direct_connections && ($settings?.directConnections ?? null)
  53. ).catch((e) => {
  54. toast.error(`${e}`);
  55. });
  56. if (res) {
  57. console.log(res);
  58. models = res.models;
  59. }
  60. }
  61. };
  62. const getVoices = async () => {
  63. if (TTS_ENGINE === '') {
  64. const getVoicesLoop = setInterval(() => {
  65. voices = speechSynthesis.getVoices();
  66. // do your loop
  67. if (voices.length > 0) {
  68. clearInterval(getVoicesLoop);
  69. voices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage));
  70. }
  71. }, 100);
  72. } else {
  73. const res = await _getVoices(localStorage.token).catch((e) => {
  74. toast.error(`${e}`);
  75. });
  76. if (res) {
  77. console.log(res);
  78. voices = res.voices;
  79. voices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage));
  80. }
  81. }
  82. };
  83. const updateConfigHandler = async () => {
  84. const res = await updateAudioConfig(localStorage.token, {
  85. tts: {
  86. OPENAI_API_BASE_URL: TTS_OPENAI_API_BASE_URL,
  87. OPENAI_API_KEY: TTS_OPENAI_API_KEY,
  88. API_KEY: TTS_API_KEY,
  89. ENGINE: TTS_ENGINE,
  90. MODEL: TTS_MODEL,
  91. VOICE: TTS_VOICE,
  92. SPLIT_ON: TTS_SPLIT_ON,
  93. AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
  94. AZURE_SPEECH_BASE_URL: TTS_AZURE_SPEECH_BASE_URL,
  95. AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT
  96. },
  97. stt: {
  98. OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
  99. OPENAI_API_KEY: STT_OPENAI_API_KEY,
  100. ENGINE: STT_ENGINE,
  101. MODEL: STT_MODEL,
  102. SUPPORTED_CONTENT_TYPES: STT_SUPPORTED_CONTENT_TYPES.split(','),
  103. WHISPER_MODEL: STT_WHISPER_MODEL,
  104. DEEPGRAM_API_KEY: STT_DEEPGRAM_API_KEY,
  105. AZURE_API_KEY: STT_AZURE_API_KEY,
  106. AZURE_REGION: STT_AZURE_REGION,
  107. AZURE_LOCALES: STT_AZURE_LOCALES,
  108. AZURE_BASE_URL: STT_AZURE_BASE_URL,
  109. AZURE_MAX_SPEAKERS: STT_AZURE_MAX_SPEAKERS
  110. }
  111. });
  112. if (res) {
  113. saveHandler();
  114. config.set(await getBackendConfig());
  115. }
  116. };
  117. const sttModelUpdateHandler = async () => {
  118. STT_WHISPER_MODEL_LOADING = true;
  119. await updateConfigHandler();
  120. STT_WHISPER_MODEL_LOADING = false;
  121. };
  122. onMount(async () => {
  123. const res = await getAudioConfig(localStorage.token);
  124. if (res) {
  125. console.log(res);
  126. TTS_OPENAI_API_BASE_URL = res.tts.OPENAI_API_BASE_URL;
  127. TTS_OPENAI_API_KEY = res.tts.OPENAI_API_KEY;
  128. TTS_API_KEY = res.tts.API_KEY;
  129. TTS_ENGINE = res.tts.ENGINE;
  130. TTS_MODEL = res.tts.MODEL;
  131. TTS_VOICE = res.tts.VOICE;
  132. TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
  133. TTS_AZURE_SPEECH_REGION = res.tts.AZURE_SPEECH_REGION;
  134. TTS_AZURE_SPEECH_BASE_URL = res.tts.AZURE_SPEECH_BASE_URL;
  135. TTS_AZURE_SPEECH_OUTPUT_FORMAT = res.tts.AZURE_SPEECH_OUTPUT_FORMAT;
  136. STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
  137. STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
  138. STT_ENGINE = res.stt.ENGINE;
  139. STT_MODEL = res.stt.MODEL;
  140. STT_SUPPORTED_CONTENT_TYPES = (res?.stt?.SUPPORTED_CONTENT_TYPES ?? []).join(',');
  141. STT_WHISPER_MODEL = res.stt.WHISPER_MODEL;
  142. STT_AZURE_API_KEY = res.stt.AZURE_API_KEY;
  143. STT_AZURE_REGION = res.stt.AZURE_REGION;
  144. STT_AZURE_LOCALES = res.stt.AZURE_LOCALES;
  145. STT_AZURE_BASE_URL = res.stt.AZURE_BASE_URL;
  146. STT_AZURE_MAX_SPEAKERS = res.stt.AZURE_MAX_SPEAKERS;
  147. STT_DEEPGRAM_API_KEY = res.stt.DEEPGRAM_API_KEY;
  148. }
  149. await getVoices();
  150. await getModels();
  151. });
  152. </script>
  153. <form
  154. class="flex flex-col h-full justify-between space-y-3 text-sm"
  155. on:submit|preventDefault={async () => {
  156. await updateConfigHandler();
  157. dispatch('save');
  158. }}
  159. >
  160. <div class=" space-y-3 overflow-y-scroll scrollbar-hidden h-full">
  161. <div class="flex flex-col gap-3">
  162. <div>
  163. <div class=" mb-2.5 text-base font-medium">{$i18n.t('Speech-to-Text')}</div>
  164. <hr class=" border-gray-100 dark:border-gray-850 my-2" />
  165. {#if STT_ENGINE !== 'web'}
  166. <div class="mb-2">
  167. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('Supported MIME Types')}</div>
  168. <div class="flex w-full">
  169. <div class="flex-1">
  170. <input
  171. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  172. bind:value={STT_SUPPORTED_CONTENT_TYPES}
  173. placeholder={$i18n.t('e.g., audio/wav,audio/mpeg (leave blank for defaults)')}
  174. />
  175. </div>
  176. </div>
  177. </div>
  178. {/if}
  179. <div class="mb-2 py-0.5 flex w-full justify-between">
  180. <div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
  181. <div class="flex items-center relative">
  182. <select
  183. class="dark:bg-gray-900 cursor-pointer w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
  184. bind:value={STT_ENGINE}
  185. placeholder="Select an engine"
  186. >
  187. <option value="">{$i18n.t('Whisper (Local)')}</option>
  188. <option value="openai">OpenAI</option>
  189. <option value="web">{$i18n.t('Web API')}</option>
  190. <option value="deepgram">Deepgram</option>
  191. <option value="azure">Azure AI Speech</option>
  192. </select>
  193. </div>
  194. </div>
  195. {#if STT_ENGINE === 'openai'}
  196. <div>
  197. <div class="mt-1 flex gap-2 mb-1">
  198. <input
  199. class="flex-1 w-full bg-transparent outline-hidden"
  200. placeholder={$i18n.t('API Base URL')}
  201. bind:value={STT_OPENAI_API_BASE_URL}
  202. required
  203. />
  204. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={STT_OPENAI_API_KEY} />
  205. </div>
  206. </div>
  207. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  208. <div>
  209. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('STT Model')}</div>
  210. <div class="flex w-full">
  211. <div class="flex-1">
  212. <input
  213. list="model-list"
  214. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  215. bind:value={STT_MODEL}
  216. placeholder="Select a model"
  217. />
  218. <datalist id="model-list">
  219. <option value="whisper-1" />
  220. </datalist>
  221. </div>
  222. </div>
  223. </div>
  224. {:else if STT_ENGINE === 'deepgram'}
  225. <div>
  226. <div class="mt-1 flex gap-2 mb-1">
  227. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={STT_DEEPGRAM_API_KEY} />
  228. </div>
  229. </div>
  230. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  231. <div>
  232. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('STT Model')}</div>
  233. <div class="flex w-full">
  234. <div class="flex-1">
  235. <input
  236. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  237. bind:value={STT_MODEL}
  238. placeholder="Select a model (optional)"
  239. />
  240. </div>
  241. </div>
  242. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  243. {$i18n.t('Leave model field empty to use the default model.')}
  244. <a
  245. class=" hover:underline dark:text-gray-200 text-gray-800"
  246. href="https://developers.deepgram.com/docs/models"
  247. target="_blank"
  248. >
  249. {$i18n.t('Click here to see available models.')}
  250. </a>
  251. </div>
  252. </div>
  253. {:else if STT_ENGINE === 'azure'}
  254. <div>
  255. <div class="mt-1 flex gap-2 mb-1">
  256. <SensitiveInput
  257. placeholder={$i18n.t('API Key')}
  258. bind:value={STT_AZURE_API_KEY}
  259. required
  260. />
  261. </div>
  262. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  263. <div>
  264. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('Azure Region')}</div>
  265. <div class="flex w-full">
  266. <div class="flex-1">
  267. <input
  268. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  269. bind:value={STT_AZURE_REGION}
  270. placeholder={$i18n.t('e.g., westus (leave blank for eastus)')}
  271. />
  272. </div>
  273. </div>
  274. </div>
  275. <div>
  276. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('Language Locales')}</div>
  277. <div class="flex w-full">
  278. <div class="flex-1">
  279. <input
  280. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  281. bind:value={STT_AZURE_LOCALES}
  282. placeholder={$i18n.t('e.g., en-US,ja-JP (leave blank for auto-detect)')}
  283. />
  284. </div>
  285. </div>
  286. </div>
  287. <div>
  288. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('Endpoint URL')}</div>
  289. <div class="flex w-full">
  290. <div class="flex-1">
  291. <input
  292. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  293. bind:value={STT_AZURE_BASE_URL}
  294. placeholder={$i18n.t('(leave blank for to use commercial endpoint)')}
  295. />
  296. </div>
  297. </div>
  298. </div>
  299. <div>
  300. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('Max Speakers')}</div>
  301. <div class="flex w-full">
  302. <div class="flex-1">
  303. <input
  304. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  305. bind:value={STT_AZURE_MAX_SPEAKERS}
  306. placeholder={$i18n.t('e.g., 3, 4, 5 (leave blank for default)')}
  307. />
  308. </div>
  309. </div>
  310. </div>
  311. </div>
  312. {:else if STT_ENGINE === ''}
  313. <div>
  314. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('STT Model')}</div>
  315. <div class="flex w-full">
  316. <div class="flex-1 mr-2">
  317. <input
  318. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  319. placeholder={$i18n.t('Set whisper model')}
  320. bind:value={STT_WHISPER_MODEL}
  321. />
  322. </div>
  323. <button
  324. class="px-2.5 bg-gray-50 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
  325. on:click={() => {
  326. sttModelUpdateHandler();
  327. }}
  328. disabled={STT_WHISPER_MODEL_LOADING}
  329. >
  330. {#if STT_WHISPER_MODEL_LOADING}
  331. <div class="self-center">
  332. <svg
  333. class=" w-4 h-4"
  334. viewBox="0 0 24 24"
  335. fill="currentColor"
  336. xmlns="http://www.w3.org/2000/svg"
  337. >
  338. <style>
  339. .spinner_ajPY {
  340. transform-origin: center;
  341. animation: spinner_AtaB 0.75s infinite linear;
  342. }
  343. @keyframes spinner_AtaB {
  344. 100% {
  345. transform: rotate(360deg);
  346. }
  347. }
  348. </style>
  349. <path
  350. d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
  351. opacity=".25"
  352. />
  353. <path
  354. d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
  355. class="spinner_ajPY"
  356. />
  357. </svg>
  358. </div>
  359. {:else}
  360. <svg
  361. xmlns="http://www.w3.org/2000/svg"
  362. viewBox="0 0 16 16"
  363. fill="currentColor"
  364. class="w-4 h-4"
  365. >
  366. <path
  367. d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
  368. />
  369. <path
  370. d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
  371. />
  372. </svg>
  373. {/if}
  374. </button>
  375. </div>
  376. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  377. {$i18n.t(`Open WebUI uses faster-whisper internally.`)}
  378. <a
  379. class=" hover:underline dark:text-gray-200 text-gray-800"
  380. href="https://github.com/SYSTRAN/faster-whisper"
  381. target="_blank"
  382. >
  383. {$i18n.t(
  384. `Click here to learn more about faster-whisper and see the available models.`
  385. )}
  386. </a>
  387. </div>
  388. </div>
  389. {/if}
  390. </div>
  391. <div>
  392. <div class=" mb-2.5 text-base font-medium">{$i18n.t('Text-to-Speech')}</div>
  393. <hr class=" border-gray-100 dark:border-gray-850 my-2" />
  394. <div class="mb-2 py-0.5 flex w-full justify-between">
  395. <div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
  396. <div class="flex items-center relative">
  397. <select
  398. class=" dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
  399. bind:value={TTS_ENGINE}
  400. placeholder="Select a mode"
  401. on:change={async (e) => {
  402. await updateConfigHandler();
  403. await getVoices();
  404. await getModels();
  405. if (e.target?.value === 'openai') {
  406. TTS_VOICE = 'alloy';
  407. TTS_MODEL = 'tts-1';
  408. } else {
  409. TTS_VOICE = '';
  410. TTS_MODEL = '';
  411. }
  412. }}
  413. >
  414. <option value="">{$i18n.t('Web API')}</option>
  415. <option value="transformers">{$i18n.t('Transformers')} ({$i18n.t('Local')})</option>
  416. <option value="openai">{$i18n.t('OpenAI')}</option>
  417. <option value="elevenlabs">{$i18n.t('ElevenLabs')}</option>
  418. <option value="azure">{$i18n.t('Azure AI Speech')}</option>
  419. </select>
  420. </div>
  421. </div>
  422. {#if TTS_ENGINE === 'openai'}
  423. <div>
  424. <div class="mt-1 flex gap-2 mb-1">
  425. <input
  426. class="flex-1 w-full bg-transparent outline-hidden"
  427. placeholder={$i18n.t('API Base URL')}
  428. bind:value={TTS_OPENAI_API_BASE_URL}
  429. required
  430. />
  431. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={TTS_OPENAI_API_KEY} />
  432. </div>
  433. </div>
  434. {:else if TTS_ENGINE === 'elevenlabs'}
  435. <div>
  436. <div class="mt-1 flex gap-2 mb-1">
  437. <input
  438. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  439. placeholder={$i18n.t('API Key')}
  440. bind:value={TTS_API_KEY}
  441. required
  442. />
  443. </div>
  444. </div>
  445. {:else if TTS_ENGINE === 'azure'}
  446. <div>
  447. <div class="mt-1 flex gap-2 mb-1">
  448. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={TTS_API_KEY} required />
  449. </div>
  450. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  451. <div>
  452. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('Azure Region')}</div>
  453. <div class="flex w-full">
  454. <div class="flex-1">
  455. <input
  456. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  457. bind:value={TTS_AZURE_SPEECH_REGION}
  458. placeholder={$i18n.t('e.g., westus (leave blank for eastus)')}
  459. />
  460. </div>
  461. </div>
  462. </div>
  463. <div>
  464. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('Endpoint URL')}</div>
  465. <div class="flex w-full">
  466. <div class="flex-1">
  467. <input
  468. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  469. bind:value={TTS_AZURE_SPEECH_BASE_URL}
  470. placeholder={$i18n.t('(leave blank for to use commercial endpoint)')}
  471. />
  472. </div>
  473. </div>
  474. </div>
  475. </div>
  476. {/if}
  477. <div class="mb-2">
  478. {#if TTS_ENGINE === ''}
  479. <div>
  480. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Voice')}</div>
  481. <div class="flex w-full">
  482. <div class="flex-1">
  483. <select
  484. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  485. bind:value={TTS_VOICE}
  486. >
  487. <option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option>
  488. {#each voices as voice}
  489. <option
  490. value={voice.voiceURI}
  491. class="bg-gray-100 dark:bg-gray-700"
  492. selected={TTS_VOICE === voice.voiceURI}>{voice.name}</option
  493. >
  494. {/each}
  495. </select>
  496. </div>
  497. </div>
  498. </div>
  499. {:else if TTS_ENGINE === 'transformers'}
  500. <div>
  501. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Model')}</div>
  502. <div class="flex w-full">
  503. <div class="flex-1">
  504. <input
  505. list="model-list"
  506. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  507. bind:value={TTS_MODEL}
  508. placeholder="CMU ARCTIC speaker embedding name"
  509. />
  510. <datalist id="model-list">
  511. <option value="tts-1" />
  512. </datalist>
  513. </div>
  514. </div>
  515. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  516. {$i18n.t(`Open WebUI uses SpeechT5 and CMU Arctic speaker embeddings.`)}
  517. To learn more about SpeechT5,
  518. <a
  519. class=" hover:underline dark:text-gray-200 text-gray-800"
  520. href="https://github.com/microsoft/SpeechT5"
  521. target="_blank"
  522. >
  523. {$i18n.t(`click here`, {
  524. name: 'SpeechT5'
  525. })}.
  526. </a>
  527. To see the available CMU Arctic speaker embeddings,
  528. <a
  529. class=" hover:underline dark:text-gray-200 text-gray-800"
  530. href="https://huggingface.co/datasets/Matthijs/cmu-arctic-xvectors"
  531. target="_blank"
  532. >
  533. {$i18n.t(`click here`)}.
  534. </a>
  535. </div>
  536. </div>
  537. {:else if TTS_ENGINE === 'openai'}
  538. <div class=" flex gap-2">
  539. <div class="w-full">
  540. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Voice')}</div>
  541. <div class="flex w-full">
  542. <div class="flex-1">
  543. <input
  544. list="voice-list"
  545. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  546. bind:value={TTS_VOICE}
  547. placeholder="Select a voice"
  548. />
  549. <datalist id="voice-list">
  550. {#each voices as voice}
  551. <option value={voice.id}>{voice.name}</option>
  552. {/each}
  553. </datalist>
  554. </div>
  555. </div>
  556. </div>
  557. <div class="w-full">
  558. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Model')}</div>
  559. <div class="flex w-full">
  560. <div class="flex-1">
  561. <input
  562. list="tts-model-list"
  563. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  564. bind:value={TTS_MODEL}
  565. placeholder="Select a model"
  566. />
  567. <datalist id="tts-model-list">
  568. {#each models as model}
  569. <option value={model.id} class="bg-gray-50 dark:bg-gray-700" />
  570. {/each}
  571. </datalist>
  572. </div>
  573. </div>
  574. </div>
  575. </div>
  576. {:else if TTS_ENGINE === 'elevenlabs'}
  577. <div class=" flex gap-2">
  578. <div class="w-full">
  579. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Voice')}</div>
  580. <div class="flex w-full">
  581. <div class="flex-1">
  582. <input
  583. list="voice-list"
  584. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  585. bind:value={TTS_VOICE}
  586. placeholder="Select a voice"
  587. />
  588. <datalist id="voice-list">
  589. {#each voices as voice}
  590. <option value={voice.id}>{voice.name}</option>
  591. {/each}
  592. </datalist>
  593. </div>
  594. </div>
  595. </div>
  596. <div class="w-full">
  597. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Model')}</div>
  598. <div class="flex w-full">
  599. <div class="flex-1">
  600. <input
  601. list="tts-model-list"
  602. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  603. bind:value={TTS_MODEL}
  604. placeholder="Select a model"
  605. />
  606. <datalist id="tts-model-list">
  607. {#each models as model}
  608. <option value={model.id} class="bg-gray-50 dark:bg-gray-700" />
  609. {/each}
  610. </datalist>
  611. </div>
  612. </div>
  613. </div>
  614. </div>
  615. {:else if TTS_ENGINE === 'azure'}
  616. <div class=" flex gap-2">
  617. <div class="w-full">
  618. <div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Voice')}</div>
  619. <div class="flex w-full">
  620. <div class="flex-1">
  621. <input
  622. list="voice-list"
  623. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  624. bind:value={TTS_VOICE}
  625. placeholder="Select a voice"
  626. />
  627. <datalist id="voice-list">
  628. {#each voices as voice}
  629. <option value={voice.id}>{voice.name}</option>
  630. {/each}
  631. </datalist>
  632. </div>
  633. </div>
  634. </div>
  635. <div class="w-full">
  636. <div class=" mb-1.5 text-xs font-medium">
  637. {$i18n.t('Output format')}
  638. <a
  639. href="https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs"
  640. target="_blank"
  641. >
  642. <small>{$i18n.t('Available list')}</small>
  643. </a>
  644. </div>
  645. <div class="flex w-full">
  646. <div class="flex-1">
  647. <input
  648. list="tts-model-list"
  649. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  650. bind:value={TTS_AZURE_SPEECH_OUTPUT_FORMAT}
  651. placeholder="Select a output format"
  652. />
  653. </div>
  654. </div>
  655. </div>
  656. </div>
  657. {/if}
  658. </div>
  659. <div class="pt-0.5 flex w-full justify-between">
  660. <div class="self-center text-xs font-medium">{$i18n.t('Response splitting')}</div>
  661. <div class="flex items-center relative">
  662. <select
  663. class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
  664. aria-label="Select how to split message text for TTS requests"
  665. bind:value={TTS_SPLIT_ON}
  666. >
  667. {#each Object.values(TTS_RESPONSE_SPLIT) as split}
  668. <option value={split}
  669. >{$i18n.t(split.charAt(0).toUpperCase() + split.slice(1))}</option
  670. >
  671. {/each}
  672. </select>
  673. </div>
  674. </div>
  675. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  676. {$i18n.t(
  677. "Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string."
  678. )}
  679. </div>
  680. </div>
  681. </div>
  682. </div>
  683. <div class="flex justify-end text-sm font-medium">
  684. <button
  685. class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
  686. type="submit"
  687. >
  688. {$i18n.t('Save')}
  689. </button>
  690. </div>
  691. </form>