Audio.svelte 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. <script lang="ts">
  2. import { toast } from 'svelte-sonner';
  3. import { createEventDispatcher, onMount, getContext } from 'svelte';
  4. const dispatch = createEventDispatcher();
  5. import { getBackendConfig } from '$lib/apis';
  6. import {
  7. getAudioConfig,
  8. updateAudioConfig,
  9. getModels as _getModels,
  10. getVoices as _getVoices
  11. } from '$lib/apis/audio';
  12. import { config } from '$lib/stores';
  13. import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
  14. import { TTS_RESPONSE_SPLIT } from '$lib/types';
  15. import type { Writable } from 'svelte/store';
  16. import type { i18n as i18nType } from 'i18next';
  17. const i18n = getContext<Writable<i18nType>>('i18n');
  18. export let saveHandler: () => void;
  19. // Audio
  20. let TTS_OPENAI_API_BASE_URL = '';
  21. let TTS_OPENAI_API_KEY = '';
  22. let TTS_API_KEY = '';
  23. let TTS_ENGINE = '';
  24. let TTS_MODEL = '';
  25. let TTS_VOICE = '';
  26. let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
  27. let STT_OPENAI_API_BASE_URL = '';
  28. let STT_OPENAI_API_KEY = '';
  29. let STT_ENGINE = '';
  30. let STT_MODEL = '';
  31. // eslint-disable-next-line no-undef
  32. let voices: SpeechSynthesisVoice[] = [];
  33. let models: Awaited<ReturnType<typeof _getModels>>['models'] = [];
  34. const getModels = async () => {
  35. if (TTS_ENGINE === '') {
  36. models = [];
  37. } else {
  38. const res = await _getModels(localStorage.token).catch((e) => {
  39. toast.error(e);
  40. });
  41. if (res) {
  42. console.log(res);
  43. models = res.models;
  44. }
  45. }
  46. };
  47. const getVoices = async () => {
  48. if (TTS_ENGINE === '') {
  49. const getVoicesLoop = setInterval(() => {
  50. voices = speechSynthesis.getVoices();
  51. // do your loop
  52. if (voices.length > 0) {
  53. clearInterval(getVoicesLoop);
  54. }
  55. }, 100);
  56. } else {
  57. const res = await _getVoices(localStorage.token).catch((e) => {
  58. toast.error(e);
  59. });
  60. if (res) {
  61. console.log(res);
  62. voices = res.voices;
  63. }
  64. }
  65. };
  66. const updateConfigHandler = async () => {
  67. const res = await updateAudioConfig(localStorage.token, {
  68. tts: {
  69. OPENAI_API_BASE_URL: TTS_OPENAI_API_BASE_URL,
  70. OPENAI_API_KEY: TTS_OPENAI_API_KEY,
  71. API_KEY: TTS_API_KEY,
  72. ENGINE: TTS_ENGINE,
  73. MODEL: TTS_MODEL,
  74. VOICE: TTS_VOICE,
  75. SPLIT_ON: TTS_SPLIT_ON
  76. },
  77. stt: {
  78. OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
  79. OPENAI_API_KEY: STT_OPENAI_API_KEY,
  80. ENGINE: STT_ENGINE,
  81. MODEL: STT_MODEL
  82. }
  83. });
  84. if (res) {
  85. saveHandler();
  86. getBackendConfig()
  87. .then(config.set)
  88. .catch(() => {});
  89. }
  90. };
  91. onMount(async () => {
  92. const res = await getAudioConfig(localStorage.token);
  93. if (res) {
  94. console.log(res);
  95. TTS_OPENAI_API_BASE_URL = res.tts.OPENAI_API_BASE_URL;
  96. TTS_OPENAI_API_KEY = res.tts.OPENAI_API_KEY;
  97. TTS_API_KEY = res.tts.API_KEY;
  98. TTS_ENGINE = res.tts.ENGINE;
  99. TTS_MODEL = res.tts.MODEL;
  100. TTS_VOICE = res.tts.VOICE;
  101. TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
  102. STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
  103. STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
  104. STT_ENGINE = res.stt.ENGINE;
  105. STT_MODEL = res.stt.MODEL;
  106. }
  107. await getVoices();
  108. await getModels();
  109. });
  110. </script>
  111. <form
  112. class="flex flex-col h-full justify-between space-y-3 text-sm"
  113. on:submit|preventDefault={async () => {
  114. await updateConfigHandler();
  115. dispatch('save');
  116. }}
  117. >
  118. <div class=" space-y-3 overflow-y-scroll scrollbar-hidden h-full">
  119. <div class="flex flex-col gap-3">
  120. <div>
  121. <div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>
  122. <div class=" py-0.5 flex w-full justify-between">
  123. <div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
  124. <div class="flex items-center relative">
  125. <select
  126. class="dark:bg-gray-900 cursor-pointer w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
  127. bind:value={STT_ENGINE}
  128. placeholder="Select an engine"
  129. >
  130. <option value="">{$i18n.t('Whisper (Local)')}</option>
  131. <option value="openai">OpenAI</option>
  132. <option value="web">{$i18n.t('Web API')}</option>
  133. </select>
  134. </div>
  135. </div>
  136. {#if STT_ENGINE === 'openai'}
  137. <div>
  138. <div class="mt-1 flex gap-2 mb-1">
  139. <input
  140. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  141. placeholder={$i18n.t('API Base URL')}
  142. bind:value={STT_OPENAI_API_BASE_URL}
  143. required
  144. />
  145. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={STT_OPENAI_API_KEY} />
  146. </div>
  147. </div>
  148. <hr class=" dark:border-gray-850 my-2" />
  149. <div>
  150. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
  151. <div class="flex w-full">
  152. <div class="flex-1">
  153. <input
  154. list="model-list"
  155. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  156. bind:value={STT_MODEL}
  157. placeholder="Select a model"
  158. />
  159. <datalist id="model-list">
  160. <option value="whisper-1" />
  161. </datalist>
  162. </div>
  163. </div>
  164. </div>
  165. {/if}
  166. </div>
  167. <hr class=" dark:border-gray-800" />
  168. <div>
  169. <div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
  170. <div class=" py-0.5 flex w-full justify-between">
  171. <div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
  172. <div class="flex items-center relative">
  173. <select
  174. class=" dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
  175. bind:value={TTS_ENGINE}
  176. placeholder="Select a mode"
  177. on:change={async (e) => {
  178. await updateConfigHandler();
  179. await getVoices();
  180. await getModels();
  181. if (e.target?.value === 'openai') {
  182. TTS_VOICE = 'alloy';
  183. TTS_MODEL = 'tts-1';
  184. } else {
  185. TTS_VOICE = '';
  186. TTS_MODEL = '';
  187. }
  188. }}
  189. >
  190. <option value="">{$i18n.t('Web API')}</option>
  191. <option value="openai">{$i18n.t('OpenAI')}</option>
  192. <option value="elevenlabs">{$i18n.t('ElevenLabs')}</option>
  193. <option value="azurespeechservice">{$i18n.t('Azure Speech service')}</option>
  194. </select>
  195. </div>
  196. </div>
  197. {#if TTS_ENGINE === 'openai'}
  198. <div>
  199. <div class="mt-1 flex gap-2 mb-1">
  200. <input
  201. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  202. placeholder={$i18n.t('API Base URL')}
  203. bind:value={TTS_OPENAI_API_BASE_URL}
  204. required
  205. />
  206. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={TTS_OPENAI_API_KEY} />
  207. </div>
  208. </div>
  209. {:else if TTS_ENGINE === 'elevenlabs'}
  210. <div>
  211. <div class="mt-1 flex gap-2 mb-1">
  212. <input
  213. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  214. placeholder={$i18n.t('API Key')}
  215. bind:value={TTS_API_KEY}
  216. required
  217. />
  218. </div>
  219. </div>
  220. {:else if TTS_ENGINE === 'azurespeechservice'}
  221. <div>
  222. <div class="mt-1 flex gap-2 mb-1">
  223. <input
  224. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  225. placeholder={$i18n.t('API Key')}
  226. bind:value={TTS_API_KEY}
  227. required
  228. />
  229. </div>
  230. </div>
  231. {/if}
  232. <hr class=" dark:border-gray-850 my-2" />
  233. {#if TTS_ENGINE === ''}
  234. <div>
  235. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  236. <div class="flex w-full">
  237. <div class="flex-1">
  238. <select
  239. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  240. bind:value={TTS_VOICE}
  241. >
  242. <option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option>
  243. {#each voices as voice}
  244. <option
  245. value={voice.voiceURI}
  246. class="bg-gray-100 dark:bg-gray-700"
  247. selected={TTS_VOICE === voice.voiceURI}>{voice.name}</option
  248. >
  249. {/each}
  250. </select>
  251. </div>
  252. </div>
  253. </div>
  254. {:else if TTS_ENGINE === 'openai'}
  255. <div class=" flex gap-2">
  256. <div class="w-full">
  257. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  258. <div class="flex w-full">
  259. <div class="flex-1">
  260. <input
  261. list="voice-list"
  262. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  263. bind:value={TTS_VOICE}
  264. placeholder="Select a voice"
  265. />
  266. <datalist id="voice-list">
  267. {#each voices as voice}
  268. <option value={voice.id}>{voice.name}</option>
  269. {/each}
  270. </datalist>
  271. </div>
  272. </div>
  273. </div>
  274. <div class="w-full">
  275. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
  276. <div class="flex w-full">
  277. <div class="flex-1">
  278. <input
  279. list="tts-model-list"
  280. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  281. bind:value={TTS_MODEL}
  282. placeholder="Select a model"
  283. />
  284. <datalist id="tts-model-list">
  285. {#each models as model}
  286. <option value={model.id} />
  287. {/each}
  288. </datalist>
  289. </div>
  290. </div>
  291. </div>
  292. </div>
  293. {:else if TTS_ENGINE === 'elevenlabs'}
  294. <div class=" flex gap-2">
  295. <div class="w-full">
  296. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  297. <div class="flex w-full">
  298. <div class="flex-1">
  299. <input
  300. list="voice-list"
  301. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  302. bind:value={TTS_VOICE}
  303. placeholder="Select a voice"
  304. />
  305. <datalist id="voice-list">
  306. {#each voices as voice}
  307. <option value={voice.id}>{voice.name}</option>
  308. {/each}
  309. </datalist>
  310. </div>
  311. </div>
  312. </div>
  313. <div class="w-full">
  314. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
  315. <div class="flex w-full">
  316. <div class="flex-1">
  317. <input
  318. list="tts-model-list"
  319. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  320. bind:value={TTS_MODEL}
  321. placeholder="Select a model"
  322. />
  323. <datalist id="tts-model-list">
  324. {#each models as model}
  325. <option value={model.id} />
  326. {/each}
  327. </datalist>
  328. </div>
  329. </div>
  330. </div>
  331. </div>
  332. {/if}
  333. <hr class="dark:border-gray-850 my-2" />
  334. <div class="pt-0.5 flex w-full justify-between">
  335. <div class="self-center text-xs font-medium">{$i18n.t('Response splitting')}</div>
  336. <div class="flex items-center relative">
  337. <select
  338. class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
  339. aria-label="Select how to split message text for TTS requests"
  340. bind:value={TTS_SPLIT_ON}
  341. >
  342. {#each Object.values(TTS_RESPONSE_SPLIT) as split}
  343. <option value={split}
  344. >{$i18n.t(split.charAt(0).toUpperCase() + split.slice(1))}</option
  345. >
  346. {/each}
  347. </select>
  348. </div>
  349. </div>
  350. <div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
  351. {$i18n.t(
  352. "Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string."
  353. )}
  354. </div>
  355. </div>
  356. </div>
  357. </div>
  358. <div class="flex justify-end text-sm font-medium">
  359. <button
  360. class=" px-4 py-2 bg-emerald-700 hover:bg-emerald-800 text-gray-100 transition rounded-lg"
  361. type="submit"
  362. >
  363. {$i18n.t('Save')}
  364. </button>
  365. </div>
  366. </form>