|
|
@@ -15,7 +15,15 @@
|
|
|
import { getChatById } from '$lib/apis/chats';
|
|
|
import { generateTags } from '$lib/apis';
|
|
|
|
|
|
- import { config, models, settings, temporaryChatEnabled, TTSWorker, user } from '$lib/stores';
|
|
|
+ import {
|
|
|
+ audioQueue,
|
|
|
+ config,
|
|
|
+ models,
|
|
|
+ settings,
|
|
|
+ temporaryChatEnabled,
|
|
|
+ TTSWorker,
|
|
|
+ user
|
|
|
+ } from '$lib/stores';
|
|
|
import { synthesizeOpenAISpeech } from '$lib/apis/audio';
|
|
|
import { imageGenerations } from '$lib/apis/images';
|
|
|
import {
|
|
|
@@ -156,7 +164,6 @@
|
|
|
|
|
|
let messageIndexEdit = false;
|
|
|
|
|
|
- let audioParts: Record<number, HTMLAudioElement | null> = {};
|
|
|
let speaking = false;
|
|
|
let speakingIdx: number | undefined;
|
|
|
|
|
|
@@ -178,51 +185,25 @@
|
|
|
}
|
|
|
};
|
|
|
|
|
|
- const playAudio = (idx: number) => {
|
|
|
- return new Promise<void>((res) => {
|
|
|
- speakingIdx = idx;
|
|
|
- const audio = audioParts[idx];
|
|
|
-
|
|
|
- if (!audio) {
|
|
|
- return res();
|
|
|
- }
|
|
|
-
|
|
|
- audio.play();
|
|
|
- audio.onended = async () => {
|
|
|
- await new Promise((r) => setTimeout(r, 300));
|
|
|
+ const stopAudio = () => {
|
|
|
+ try {
|
|
|
+ speechSynthesis.cancel();
|
|
|
+ $audioQueue.stop();
|
|
|
+ } catch {}
|
|
|
|
|
|
- if (Object.keys(audioParts).length - 1 === idx) {
|
|
|
- speaking = false;
|
|
|
- }
|
|
|
-
|
|
|
- res();
|
|
|
- };
|
|
|
- });
|
|
|
- };
|
|
|
-
|
|
|
- const toggleSpeakMessage = async () => {
|
|
|
if (speaking) {
|
|
|
- try {
|
|
|
- speechSynthesis.cancel();
|
|
|
-
|
|
|
- if (speakingIdx !== undefined && audioParts[speakingIdx]) {
|
|
|
- audioParts[speakingIdx]!.pause();
|
|
|
- audioParts[speakingIdx]!.currentTime = 0;
|
|
|
- }
|
|
|
- } catch {}
|
|
|
-
|
|
|
speaking = false;
|
|
|
speakingIdx = undefined;
|
|
|
- return;
|
|
|
}
|
|
|
+ };
|
|
|
|
|
|
+ const speak = async () => {
|
|
|
if (!(message?.content ?? '').trim().length) {
|
|
|
toast.info($i18n.t('No content to speak'));
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
speaking = true;
|
|
|
-
|
|
|
const content = removeAllDetails(message.content);
|
|
|
|
|
|
if ($config.audio.tts.engine === '') {
|
|
|
@@ -241,12 +222,12 @@
|
|
|
|
|
|
console.log(voice);
|
|
|
|
|
|
- const speak = new SpeechSynthesisUtterance(content);
|
|
|
- speak.rate = $settings.audio?.tts?.playbackRate ?? 1;
|
|
|
+ const speech = new SpeechSynthesisUtterance(content);
|
|
|
+ speech.rate = $settings.audio?.tts?.playbackRate ?? 1;
|
|
|
|
|
|
- console.log(speak);
|
|
|
+ console.log(speech);
|
|
|
|
|
|
- speak.onend = () => {
|
|
|
+ speech.onend = () => {
|
|
|
speaking = false;
|
|
|
if ($settings.conversationMode) {
|
|
|
document.getElementById('voice-input-button')?.click();
|
|
|
@@ -254,15 +235,21 @@
|
|
|
};
|
|
|
|
|
|
if (voice) {
|
|
|
- speak.voice = voice;
|
|
|
+ speech.voice = voice;
|
|
|
}
|
|
|
|
|
|
- speechSynthesis.speak(speak);
|
|
|
+ speechSynthesis.speak(speech);
|
|
|
}
|
|
|
}, 100);
|
|
|
} else {
|
|
|
- loadingSpeech = true;
|
|
|
+ $audioQueue.setId(`${message.id}`);
|
|
|
+ $audioQueue.setPlaybackRate($settings.audio?.tts?.playbackRate ?? 1);
|
|
|
+ $audioQueue.onStopped = () => {
|
|
|
+ speaking = false;
|
|
|
+ speakingIdx = undefined;
|
|
|
+ };
|
|
|
|
|
|
+ loadingSpeech = true;
|
|
|
const messageContentParts: string[] = getMessageContentParts(
|
|
|
content,
|
|
|
$config?.audio?.tts?.split_on ?? 'punctuation'
|
|
|
@@ -278,17 +265,6 @@
|
|
|
}
|
|
|
|
|
|
console.debug('Prepared message content for TTS', messageContentParts);
|
|
|
-
|
|
|
- audioParts = messageContentParts.reduce(
|
|
|
- (acc, _sentence, idx) => {
|
|
|
- acc[idx] = null;
|
|
|
- return acc;
|
|
|
- },
|
|
|
- {} as typeof audioParts
|
|
|
- );
|
|
|
-
|
|
|
- let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
|
|
-
|
|
|
if ($settings.audio?.tts?.engine === 'browser-kokoro') {
|
|
|
if (!$TTSWorker) {
|
|
|
await TTSWorker.set(
|
|
|
@@ -315,12 +291,9 @@
|
|
|
});
|
|
|
|
|
|
if (blob) {
|
|
|
- const audio = new Audio(blob);
|
|
|
- audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
|
|
|
-
|
|
|
- audioParts[idx] = audio;
|
|
|
+ const url = URL.createObjectURL(blob);
|
|
|
+ $audioQueue.enqueue(url);
|
|
|
loadingSpeech = false;
|
|
|
- lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
|
|
}
|
|
|
}
|
|
|
} else {
|
|
|
@@ -341,13 +314,10 @@
|
|
|
|
|
|
if (res) {
|
|
|
const blob = await res.blob();
|
|
|
- const blobUrl = URL.createObjectURL(blob);
|
|
|
- const audio = new Audio(blobUrl);
|
|
|
- audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
|
|
|
+ const url = URL.createObjectURL(blob);
|
|
|
|
|
|
- audioParts[idx] = audio;
|
|
|
+ $audioQueue.enqueue(url);
|
|
|
loadingSpeech = false;
|
|
|
- lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -992,7 +962,11 @@
|
|
|
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
|
|
|
on:click={() => {
|
|
|
if (!loadingSpeech) {
|
|
|
- toggleSpeakMessage();
|
|
|
+ if (speaking) {
|
|
|
+ stopAudio();
|
|
|
+ } else {
|
|
|
+ speak();
|
|
|
+ }
|
|
|
}
|
|
|
}}
|
|
|
>
|