Audio.svelte 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712
  1. <script lang="ts">
  2. import { toast } from 'svelte-sonner';
  3. import { createEventDispatcher, onMount, getContext } from 'svelte';
  4. const dispatch = createEventDispatcher();
  5. import { getBackendConfig } from '$lib/apis';
  6. import {
  7. getAudioConfig,
  8. updateAudioConfig,
  9. getModels as _getModels,
  10. getVoices as _getVoices
  11. } from '$lib/apis/audio';
  12. import { config, settings } from '$lib/stores';
  13. import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
  14. import { TTS_RESPONSE_SPLIT } from '$lib/types';
  15. import type { Writable } from 'svelte/store';
  16. import type { i18n as i18nType } from 'i18next';
  17. const i18n = getContext<Writable<i18nType>>('i18n');
  18. export let saveHandler: () => void;
  19. // Audio
  20. let TTS_OPENAI_API_BASE_URL = '';
  21. let TTS_OPENAI_API_KEY = '';
  22. let TTS_API_KEY = '';
  23. let TTS_ENGINE = '';
  24. let TTS_MODEL = '';
  25. let TTS_VOICE = '';
  26. let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
  27. let TTS_AZURE_SPEECH_REGION = '';
  28. let TTS_AZURE_SPEECH_OUTPUT_FORMAT = '';
  29. let STT_OPENAI_API_BASE_URL = '';
  30. let STT_OPENAI_API_KEY = '';
  31. let STT_ENGINE = '';
  32. let STT_MODEL = '';
  33. let STT_WHISPER_MODEL = '';
  34. let STT_AZURE_API_KEY = '';
  35. let STT_AZURE_REGION = '';
  36. let STT_AZURE_LOCALES = '';
  37. let STT_AZURE_BASE_URL = '';
  38. let STT_AZURE_MAX_SPEAKERS = '';
  39. let STT_DEEPGRAM_API_KEY = '';
  40. let STT_WHISPER_MODEL_LOADING = false;
  41. // eslint-disable-next-line no-undef
  42. let voices: SpeechSynthesisVoice[] = [];
  43. let models: Awaited<ReturnType<typeof _getModels>>['models'] = [];
  44. const getModels = async () => {
  45. if (TTS_ENGINE === '') {
  46. models = [];
  47. } else {
  48. const res = await _getModels(
  49. localStorage.token,
  50. $config?.features?.enable_direct_connections && ($settings?.directConnections ?? null)
  51. ).catch((e) => {
  52. toast.error(`${e}`);
  53. });
  54. if (res) {
  55. console.log(res);
  56. models = res.models;
  57. }
  58. }
  59. };
  60. const getVoices = async () => {
  61. if (TTS_ENGINE === '') {
  62. const getVoicesLoop = setInterval(() => {
  63. voices = speechSynthesis.getVoices();
  64. // do your loop
  65. if (voices.length > 0) {
  66. clearInterval(getVoicesLoop);
  67. voices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage));
  68. }
  69. }, 100);
  70. } else {
  71. const res = await _getVoices(localStorage.token).catch((e) => {
  72. toast.error(`${e}`);
  73. });
  74. if (res) {
  75. console.log(res);
  76. voices = res.voices;
  77. voices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage));
  78. }
  79. }
  80. };
  81. const updateConfigHandler = async () => {
  82. const res = await updateAudioConfig(localStorage.token, {
  83. tts: {
  84. OPENAI_API_BASE_URL: TTS_OPENAI_API_BASE_URL,
  85. OPENAI_API_KEY: TTS_OPENAI_API_KEY,
  86. API_KEY: TTS_API_KEY,
  87. ENGINE: TTS_ENGINE,
  88. MODEL: TTS_MODEL,
  89. VOICE: TTS_VOICE,
  90. SPLIT_ON: TTS_SPLIT_ON,
  91. AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
  92. AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT
  93. },
  94. stt: {
  95. OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
  96. OPENAI_API_KEY: STT_OPENAI_API_KEY,
  97. ENGINE: STT_ENGINE,
  98. MODEL: STT_MODEL,
  99. WHISPER_MODEL: STT_WHISPER_MODEL,
  100. DEEPGRAM_API_KEY: STT_DEEPGRAM_API_KEY,
  101. AZURE_API_KEY: STT_AZURE_API_KEY,
  102. AZURE_REGION: STT_AZURE_REGION,
  103. AZURE_LOCALES: STT_AZURE_LOCALES,
  104. AZURE_BASE_URL: STT_AZURE_BASE_URL,
  105. AZURE_MAX_SPEAKERS: STT_AZURE_MAX_SPEAKERS
  106. }
  107. });
  108. if (res) {
  109. saveHandler();
  110. config.set(await getBackendConfig());
  111. }
  112. };
  113. const sttModelUpdateHandler = async () => {
  114. STT_WHISPER_MODEL_LOADING = true;
  115. await updateConfigHandler();
  116. STT_WHISPER_MODEL_LOADING = false;
  117. };
  118. onMount(async () => {
  119. const res = await getAudioConfig(localStorage.token);
  120. if (res) {
  121. console.log(res);
  122. TTS_OPENAI_API_BASE_URL = res.tts.OPENAI_API_BASE_URL;
  123. TTS_OPENAI_API_KEY = res.tts.OPENAI_API_KEY;
  124. TTS_API_KEY = res.tts.API_KEY;
  125. TTS_ENGINE = res.tts.ENGINE;
  126. TTS_MODEL = res.tts.MODEL;
  127. TTS_VOICE = res.tts.VOICE;
  128. TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
  129. TTS_AZURE_SPEECH_OUTPUT_FORMAT = res.tts.AZURE_SPEECH_OUTPUT_FORMAT;
  130. TTS_AZURE_SPEECH_REGION = res.tts.AZURE_SPEECH_REGION;
  131. STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
  132. STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
  133. STT_ENGINE = res.stt.ENGINE;
  134. STT_MODEL = res.stt.MODEL;
  135. STT_WHISPER_MODEL = res.stt.WHISPER_MODEL;
  136. STT_AZURE_API_KEY = res.stt.AZURE_API_KEY;
  137. STT_AZURE_REGION = res.stt.AZURE_REGION;
  138. STT_AZURE_LOCALES = res.stt.AZURE_LOCALES;
  139. STT_AZURE_BASE_URL = res.stt.AZURE_BASE_URL;
  140. STT_AZURE_MAX_SPEAKERS = res.stt.AZURE_MAX_SPEAKERS;
  141. STT_DEEPGRAM_API_KEY = res.stt.DEEPGRAM_API_KEY;
  142. }
  143. await getVoices();
  144. await getModels();
  145. });
  146. </script>
  147. <form
  148. class="flex flex-col h-full justify-between space-y-3 text-sm"
  149. on:submit|preventDefault={async () => {
  150. await updateConfigHandler();
  151. dispatch('save');
  152. }}
  153. >
  154. <div class=" space-y-3 overflow-y-scroll scrollbar-hidden h-full">
  155. <div class="flex flex-col gap-3">
  156. <div>
  157. <div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>
  158. <div class=" py-0.5 flex w-full justify-between">
  159. <div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
  160. <div class="flex items-center relative">
  161. <select
  162. class="dark:bg-gray-900 cursor-pointer w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
  163. bind:value={STT_ENGINE}
  164. placeholder="Select an engine"
  165. >
  166. <option value="">{$i18n.t('Whisper (Local)')}</option>
  167. <option value="openai">OpenAI</option>
  168. <option value="web">{$i18n.t('Web API')}</option>
  169. <option value="deepgram">Deepgram</option>
  170. <option value="azure">Azure AI Speech</option>
  171. </select>
  172. </div>
  173. </div>
  174. {#if STT_ENGINE === 'openai'}
  175. <div>
  176. <div class="mt-1 flex gap-2 mb-1">
  177. <input
  178. class="flex-1 w-full bg-transparent outline-hidden"
  179. placeholder={$i18n.t('API Base URL')}
  180. bind:value={STT_OPENAI_API_BASE_URL}
  181. required
  182. />
  183. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={STT_OPENAI_API_KEY} />
  184. </div>
  185. </div>
  186. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  187. <div>
  188. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
  189. <div class="flex w-full">
  190. <div class="flex-1">
  191. <input
  192. list="model-list"
  193. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  194. bind:value={STT_MODEL}
  195. placeholder="Select a model"
  196. />
  197. <datalist id="model-list">
  198. <option value="whisper-1" />
  199. </datalist>
  200. </div>
  201. </div>
  202. </div>
  203. {:else if STT_ENGINE === 'deepgram'}
  204. <div>
  205. <div class="mt-1 flex gap-2 mb-1">
  206. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={STT_DEEPGRAM_API_KEY} />
  207. </div>
  208. </div>
  209. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  210. <div>
  211. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
  212. <div class="flex w-full">
  213. <div class="flex-1">
  214. <input
  215. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  216. bind:value={STT_MODEL}
  217. placeholder="Select a model (optional)"
  218. />
  219. </div>
  220. </div>
  221. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  222. {$i18n.t('Leave model field empty to use the default model.')}
  223. <a
  224. class=" hover:underline dark:text-gray-200 text-gray-800"
  225. href="https://developers.deepgram.com/docs/models"
  226. target="_blank"
  227. >
  228. {$i18n.t('Click here to see available models.')}
  229. </a>
  230. </div>
  231. </div>
  232. {:else if STT_ENGINE === 'azure'}
  233. <div>
  234. <div class="mt-1 flex gap-2 mb-1">
  235. <SensitiveInput
  236. placeholder={$i18n.t('API Key')}
  237. bind:value={STT_AZURE_API_KEY}
  238. required
  239. />
  240. <input
  241. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  242. placeholder={$i18n.t('Azure Region')}
  243. bind:value={STT_AZURE_REGION}
  244. required
  245. />
  246. </div>
  247. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  248. <div>
  249. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('Language Locales')}</div>
  250. <div class="flex w-full">
  251. <div class="flex-1">
  252. <input
  253. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  254. bind:value={STT_AZURE_LOCALES}
  255. placeholder={$i18n.t('e.g., en-US,ja-JP (leave blank for auto-detect)')}
  256. />
  257. </div>
  258. </div>
  259. </div>
  260. <div>
  261. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('Base URL')}</div>
  262. <div class="flex w-full">
  263. <div class="flex-1">
  264. <input
  265. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  266. bind:value={STT_AZURE_BASE_URL}
  267. placeholder={$i18n.t('(leave blank for Azure Commercial URL auto-generation)')}
  268. />
  269. </div>
  270. </div>
  271. </div>
  272. <div>
  273. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('Max Speakers')}</div>
  274. <div class="flex w-full">
  275. <div class="flex-1">
  276. <input
  277. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  278. bind:value={STT_AZURE_MAX_SPEAKERS}
  279. placeholder={$i18n.t('e.g., 3, 4, 5 (leave blank for default)')}
  280. />
  281. </div>
  282. </div>
  283. </div>
  284. </div>
  285. {:else if STT_ENGINE === ''}
  286. <div>
  287. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
  288. <div class="flex w-full">
  289. <div class="flex-1 mr-2">
  290. <input
  291. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  292. placeholder={$i18n.t('Set whisper model')}
  293. bind:value={STT_WHISPER_MODEL}
  294. />
  295. </div>
  296. <button
  297. class="px-2.5 bg-gray-50 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
  298. on:click={() => {
  299. sttModelUpdateHandler();
  300. }}
  301. disabled={STT_WHISPER_MODEL_LOADING}
  302. >
  303. {#if STT_WHISPER_MODEL_LOADING}
  304. <div class="self-center">
  305. <svg
  306. class=" w-4 h-4"
  307. viewBox="0 0 24 24"
  308. fill="currentColor"
  309. xmlns="http://www.w3.org/2000/svg"
  310. >
  311. <style>
  312. .spinner_ajPY {
  313. transform-origin: center;
  314. animation: spinner_AtaB 0.75s infinite linear;
  315. }
  316. @keyframes spinner_AtaB {
  317. 100% {
  318. transform: rotate(360deg);
  319. }
  320. }
  321. </style>
  322. <path
  323. d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
  324. opacity=".25"
  325. />
  326. <path
  327. d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
  328. class="spinner_ajPY"
  329. />
  330. </svg>
  331. </div>
  332. {:else}
  333. <svg
  334. xmlns="http://www.w3.org/2000/svg"
  335. viewBox="0 0 16 16"
  336. fill="currentColor"
  337. class="w-4 h-4"
  338. >
  339. <path
  340. d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
  341. />
  342. <path
  343. d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
  344. />
  345. </svg>
  346. {/if}
  347. </button>
  348. </div>
  349. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  350. {$i18n.t(`Open WebUI uses faster-whisper internally.`)}
  351. <a
  352. class=" hover:underline dark:text-gray-200 text-gray-800"
  353. href="https://github.com/SYSTRAN/faster-whisper"
  354. target="_blank"
  355. >
  356. {$i18n.t(
  357. `Click here to learn more about faster-whisper and see the available models.`
  358. )}
  359. </a>
  360. </div>
  361. </div>
  362. {/if}
  363. </div>
  364. <hr class="border-gray-100 dark:border-gray-850" />
  365. <div>
  366. <div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
  367. <div class=" py-0.5 flex w-full justify-between">
  368. <div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
  369. <div class="flex items-center relative">
  370. <select
  371. class=" dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
  372. bind:value={TTS_ENGINE}
  373. placeholder="Select a mode"
  374. on:change={async (e) => {
  375. await updateConfigHandler();
  376. await getVoices();
  377. await getModels();
  378. if (e.target?.value === 'openai') {
  379. TTS_VOICE = 'alloy';
  380. TTS_MODEL = 'tts-1';
  381. } else {
  382. TTS_VOICE = '';
  383. TTS_MODEL = '';
  384. }
  385. }}
  386. >
  387. <option value="">{$i18n.t('Web API')}</option>
  388. <option value="transformers">{$i18n.t('Transformers')} ({$i18n.t('Local')})</option>
  389. <option value="openai">{$i18n.t('OpenAI')}</option>
  390. <option value="elevenlabs">{$i18n.t('ElevenLabs')}</option>
  391. <option value="azure">{$i18n.t('Azure AI Speech')}</option>
  392. </select>
  393. </div>
  394. </div>
  395. {#if TTS_ENGINE === 'openai'}
  396. <div>
  397. <div class="mt-1 flex gap-2 mb-1">
  398. <input
  399. class="flex-1 w-full bg-transparent outline-hidden"
  400. placeholder={$i18n.t('API Base URL')}
  401. bind:value={TTS_OPENAI_API_BASE_URL}
  402. required
  403. />
  404. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={TTS_OPENAI_API_KEY} />
  405. </div>
  406. </div>
  407. {:else if TTS_ENGINE === 'elevenlabs'}
  408. <div>
  409. <div class="mt-1 flex gap-2 mb-1">
  410. <input
  411. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  412. placeholder={$i18n.t('API Key')}
  413. bind:value={TTS_API_KEY}
  414. required
  415. />
  416. </div>
  417. </div>
  418. {:else if TTS_ENGINE === 'azure'}
  419. <div>
  420. <div class="mt-1 flex gap-2 mb-1">
  421. <input
  422. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  423. placeholder={$i18n.t('API Key')}
  424. bind:value={TTS_API_KEY}
  425. required
  426. />
  427. <input
  428. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  429. placeholder={$i18n.t('Azure Region')}
  430. bind:value={TTS_AZURE_SPEECH_REGION}
  431. required
  432. />
  433. </div>
  434. </div>
  435. {/if}
  436. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  437. {#if TTS_ENGINE === ''}
  438. <div>
  439. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  440. <div class="flex w-full">
  441. <div class="flex-1">
  442. <select
  443. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  444. bind:value={TTS_VOICE}
  445. >
  446. <option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option>
  447. {#each voices as voice}
  448. <option
  449. value={voice.voiceURI}
  450. class="bg-gray-100 dark:bg-gray-700"
  451. selected={TTS_VOICE === voice.voiceURI}>{voice.name}</option
  452. >
  453. {/each}
  454. </select>
  455. </div>
  456. </div>
  457. </div>
  458. {:else if TTS_ENGINE === 'transformers'}
  459. <div>
  460. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
  461. <div class="flex w-full">
  462. <div class="flex-1">
  463. <input
  464. list="model-list"
  465. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  466. bind:value={TTS_MODEL}
  467. placeholder="CMU ARCTIC speaker embedding name"
  468. />
  469. <datalist id="model-list">
  470. <option value="tts-1" />
  471. </datalist>
  472. </div>
  473. </div>
  474. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  475. {$i18n.t(`Open WebUI uses SpeechT5 and CMU Arctic speaker embeddings.`)}
  476. To learn more about SpeechT5,
  477. <a
  478. class=" hover:underline dark:text-gray-200 text-gray-800"
  479. href="https://github.com/microsoft/SpeechT5"
  480. target="_blank"
  481. >
  482. {$i18n.t(`click here`, {
  483. name: 'SpeechT5'
  484. })}.
  485. </a>
  486. To see the available CMU Arctic speaker embeddings,
  487. <a
  488. class=" hover:underline dark:text-gray-200 text-gray-800"
  489. href="https://huggingface.co/datasets/Matthijs/cmu-arctic-xvectors"
  490. target="_blank"
  491. >
  492. {$i18n.t(`click here`)}.
  493. </a>
  494. </div>
  495. </div>
  496. {:else if TTS_ENGINE === 'openai'}
  497. <div class=" flex gap-2">
  498. <div class="w-full">
  499. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  500. <div class="flex w-full">
  501. <div class="flex-1">
  502. <input
  503. list="voice-list"
  504. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  505. bind:value={TTS_VOICE}
  506. placeholder="Select a voice"
  507. />
  508. <datalist id="voice-list">
  509. {#each voices as voice}
  510. <option value={voice.id}>{voice.name}</option>
  511. {/each}
  512. </datalist>
  513. </div>
  514. </div>
  515. </div>
  516. <div class="w-full">
  517. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
  518. <div class="flex w-full">
  519. <div class="flex-1">
  520. <input
  521. list="tts-model-list"
  522. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  523. bind:value={TTS_MODEL}
  524. placeholder="Select a model"
  525. />
  526. <datalist id="tts-model-list">
  527. {#each models as model}
  528. <option value={model.id} class="bg-gray-50 dark:bg-gray-700" />
  529. {/each}
  530. </datalist>
  531. </div>
  532. </div>
  533. </div>
  534. </div>
  535. {:else if TTS_ENGINE === 'elevenlabs'}
  536. <div class=" flex gap-2">
  537. <div class="w-full">
  538. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  539. <div class="flex w-full">
  540. <div class="flex-1">
  541. <input
  542. list="voice-list"
  543. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  544. bind:value={TTS_VOICE}
  545. placeholder="Select a voice"
  546. />
  547. <datalist id="voice-list">
  548. {#each voices as voice}
  549. <option value={voice.id}>{voice.name}</option>
  550. {/each}
  551. </datalist>
  552. </div>
  553. </div>
  554. </div>
  555. <div class="w-full">
  556. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
  557. <div class="flex w-full">
  558. <div class="flex-1">
  559. <input
  560. list="tts-model-list"
  561. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  562. bind:value={TTS_MODEL}
  563. placeholder="Select a model"
  564. />
  565. <datalist id="tts-model-list">
  566. {#each models as model}
  567. <option value={model.id} class="bg-gray-50 dark:bg-gray-700" />
  568. {/each}
  569. </datalist>
  570. </div>
  571. </div>
  572. </div>
  573. </div>
  574. {:else if TTS_ENGINE === 'azure'}
  575. <div class=" flex gap-2">
  576. <div class="w-full">
  577. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  578. <div class="flex w-full">
  579. <div class="flex-1">
  580. <input
  581. list="voice-list"
  582. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  583. bind:value={TTS_VOICE}
  584. placeholder="Select a voice"
  585. />
  586. <datalist id="voice-list">
  587. {#each voices as voice}
  588. <option value={voice.id}>{voice.name}</option>
  589. {/each}
  590. </datalist>
  591. </div>
  592. </div>
  593. </div>
  594. <div class="w-full">
  595. <div class=" mb-1.5 text-sm font-medium">
  596. {$i18n.t('Output format')}
  597. <a
  598. href="https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs"
  599. target="_blank"
  600. >
  601. <small>{$i18n.t('Available list')}</small>
  602. </a>
  603. </div>
  604. <div class="flex w-full">
  605. <div class="flex-1">
  606. <input
  607. list="tts-model-list"
  608. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
  609. bind:value={TTS_AZURE_SPEECH_OUTPUT_FORMAT}
  610. placeholder="Select a output format"
  611. />
  612. </div>
  613. </div>
  614. </div>
  615. </div>
  616. {/if}
  617. <hr class="border-gray-100 dark:border-gray-850 my-2" />
  618. <div class="pt-0.5 flex w-full justify-between">
  619. <div class="self-center text-xs font-medium">{$i18n.t('Response splitting')}</div>
  620. <div class="flex items-center relative">
  621. <select
  622. class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
  623. aria-label="Select how to split message text for TTS requests"
  624. bind:value={TTS_SPLIT_ON}
  625. >
  626. {#each Object.values(TTS_RESPONSE_SPLIT) as split}
  627. <option value={split}
  628. >{$i18n.t(split.charAt(0).toUpperCase() + split.slice(1))}</option
  629. >
  630. {/each}
  631. </select>
  632. </div>
  633. </div>
  634. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  635. {$i18n.t(
  636. "Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string."
  637. )}
  638. </div>
  639. </div>
  640. </div>
  641. </div>
  642. <div class="flex justify-end text-sm font-medium">
  643. <button
  644. class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
  645. type="submit"
  646. >
  647. {$i18n.t('Save')}
  648. </button>
  649. </div>
  650. </form>