Browse Source

enh/refac: read aloud audio queue

Timothy Jaeryang Baek 3 months ago
parent
commit
1cc3493dc8

+ 17 - 1
src/lib/components/chat/Chat.svelte

@@ -27,6 +27,7 @@
 		banners,
 		user,
 		socket,
+		audioQueue,
 		showControls,
 		showCallOverlay,
 		currentChatPage,
@@ -43,6 +44,7 @@
 		pinnedChats,
 		showEmbeds
 	} from '$lib/stores';
+
 	import {
 		convertMessagesToHistory,
 		copyToClipboard,
@@ -53,6 +55,8 @@
 		removeAllDetails,
 		getCodeBlockContents
 	} from '$lib/utils';
+	import { AudioQueue } from '$lib/utils/audio';
+
 	import {
 		createNewChat,
 		getAllTags,
@@ -529,17 +533,28 @@
 	let showControlsSubscribe = null;
 	let selectedFolderSubscribe = null;
 
+	const stopAudio = () => {
+		try {
+			speechSynthesis.cancel();
+			$audioQueue.stop();
+		} catch {}
+	};
+
 	onMount(async () => {
 		loading = true;
 		console.log('mounted');
 		window.addEventListener('message', onMessageHandler);
 		$socket?.on('events', chatEventHandler);
 
+		audioQueue.set(new AudioQueue(document.getElementById('audioElement')));
+
 		pageSubscribe = page.subscribe(async (p) => {
 			if (p.url.pathname === '/') {
 				await tick();
 				initNewChat();
 			}
+
+			stopAudio();
 		});
 
 		const storageChatInput = sessionStorage.getItem(
@@ -621,6 +636,7 @@
 			chatIdUnsubscriber?.();
 			window.removeEventListener('message', onMessageHandler);
 			$socket?.off('events', chatEventHandler);
+			$audioQueue?.destroy();
 		} catch (e) {
 			console.error(e);
 		}
@@ -2347,7 +2363,7 @@
 	</title>
 </svelte:head>
 
-<audio id="audioElement" src="" style="display: none;" />
+<audio id="audioElement" src="" style="display: none;"></audio>
 
 <EventConfirmDialog
 	bind:show={showEventConfirmation}

+ 38 - 64
src/lib/components/chat/Messages/ResponseMessage.svelte

@@ -15,7 +15,15 @@
 	import { getChatById } from '$lib/apis/chats';
 	import { generateTags } from '$lib/apis';
 
-	import { config, models, settings, temporaryChatEnabled, TTSWorker, user } from '$lib/stores';
+	import {
+		audioQueue,
+		config,
+		models,
+		settings,
+		temporaryChatEnabled,
+		TTSWorker,
+		user
+	} from '$lib/stores';
 	import { synthesizeOpenAISpeech } from '$lib/apis/audio';
 	import { imageGenerations } from '$lib/apis/images';
 	import {
@@ -156,7 +164,6 @@
 
 	let messageIndexEdit = false;
 
-	let audioParts: Record<number, HTMLAudioElement | null> = {};
 	let speaking = false;
 	let speakingIdx: number | undefined;
 
@@ -178,51 +185,25 @@
 		}
 	};
 
-	const playAudio = (idx: number) => {
-		return new Promise<void>((res) => {
-			speakingIdx = idx;
-			const audio = audioParts[idx];
-
-			if (!audio) {
-				return res();
-			}
-
-			audio.play();
-			audio.onended = async () => {
-				await new Promise((r) => setTimeout(r, 300));
+	const stopAudio = () => {
+		try {
+			speechSynthesis.cancel();
+			$audioQueue.stop();
+		} catch {}
 
-				if (Object.keys(audioParts).length - 1 === idx) {
-					speaking = false;
-				}
-
-				res();
-			};
-		});
-	};
-
-	const toggleSpeakMessage = async () => {
 		if (speaking) {
-			try {
-				speechSynthesis.cancel();
-
-				if (speakingIdx !== undefined && audioParts[speakingIdx]) {
-					audioParts[speakingIdx]!.pause();
-					audioParts[speakingIdx]!.currentTime = 0;
-				}
-			} catch {}
-
 			speaking = false;
 			speakingIdx = undefined;
-			return;
 		}
+	};
 
+	const speak = async () => {
 		if (!(message?.content ?? '').trim().length) {
 			toast.info($i18n.t('No content to speak'));
 			return;
 		}
 
 		speaking = true;
-
 		const content = removeAllDetails(message.content);
 
 		if ($config.audio.tts.engine === '') {
@@ -241,12 +222,12 @@
 
 					console.log(voice);
 
-					const speak = new SpeechSynthesisUtterance(content);
-					speak.rate = $settings.audio?.tts?.playbackRate ?? 1;
+					const speech = new SpeechSynthesisUtterance(content);
+					speech.rate = $settings.audio?.tts?.playbackRate ?? 1;
 
-					console.log(speak);
+					console.log(speech);
 
-					speak.onend = () => {
+					speech.onend = () => {
 						speaking = false;
 						if ($settings.conversationMode) {
 							document.getElementById('voice-input-button')?.click();
@@ -254,15 +235,21 @@
 					};
 
 					if (voice) {
-						speak.voice = voice;
+						speech.voice = voice;
 					}
 
-					speechSynthesis.speak(speak);
+					speechSynthesis.speak(speech);
 				}
 			}, 100);
 		} else {
-			loadingSpeech = true;
+			$audioQueue.setId(`${message.id}`);
+			$audioQueue.setPlaybackRate($settings.audio?.tts?.playbackRate ?? 1);
+			$audioQueue.onStopped = () => {
+				speaking = false;
+				speakingIdx = undefined;
+			};
 
+			loadingSpeech = true;
 			const messageContentParts: string[] = getMessageContentParts(
 				content,
 				$config?.audio?.tts?.split_on ?? 'punctuation'
@@ -278,17 +265,6 @@
 			}
 
 			console.debug('Prepared message content for TTS', messageContentParts);
-
-			audioParts = messageContentParts.reduce(
-				(acc, _sentence, idx) => {
-					acc[idx] = null;
-					return acc;
-				},
-				{} as typeof audioParts
-			);
-
-			let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
-
 			if ($settings.audio?.tts?.engine === 'browser-kokoro') {
 				if (!$TTSWorker) {
 					await TTSWorker.set(
@@ -315,12 +291,9 @@
 						});
 
 					if (blob) {
-						const audio = new Audio(blob);
-						audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
-
-						audioParts[idx] = audio;
+						const url = URL.createObjectURL(blob);
+						$audioQueue.enqueue(url);
 						loadingSpeech = false;
-						lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
 					}
 				}
 			} else {
@@ -341,13 +314,10 @@
 
 					if (res) {
 						const blob = await res.blob();
-						const blobUrl = URL.createObjectURL(blob);
-						const audio = new Audio(blobUrl);
-						audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
+						const url = URL.createObjectURL(blob);
 
-						audioParts[idx] = audio;
+						$audioQueue.enqueue(url);
 						loadingSpeech = false;
-						lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
 					}
 				}
 			}
@@ -992,7 +962,11 @@
 												: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
 											on:click={() => {
 												if (!loadingSpeech) {
-													toggleSpeakMessage();
+													if (speaking) {
+														stopAudio();
+													} else {
+														speak();
+													}
 												}
 											}}
 										>

+ 2 - 0
src/lib/stores/index.ts

@@ -68,6 +68,8 @@ export const banners: Writable<Banner[]> = writable([]);
 
 export const settings: Writable<Settings> = writable({});
 
+export const audioQueue = writable(null);
+
 export const showSidebar = writable(false);
 export const showSearch = writable(false);
 export const showSettings = writable(false);

+ 73 - 0
src/lib/utils/audio.ts

@@ -0,0 +1,73 @@
+export class AudioQueue {
+	constructor(audioElement) {
+		this.audio = audioElement;
+		this.queue = [];
+		this.current = null;
+		this.id = null;
+
+		this._onEnded = () => this.next();
+		this.audio.addEventListener('ended', this._onEnded);
+
+		this.onStopped = null; // optional callback
+	}
+
+	setId(newId) {
+		console.log('Setting audio queue ID to:', newId);
+		if (this.id !== newId) {
+			this.stop();
+			this.id = newId;
+			if (this.onStopped) this.onStopped({ event: 'id-change', id: newId });
+		}
+	}
+
+	setPlaybackRate(rate) {
+		console.log('Setting audio playback rate to:', rate);
+		this.audio.playbackRate = rate;
+	}
+
+	enqueue(url) {
+		console.log('Enqueuing audio URL:', url);
+		this.queue.push(url);
+
+		// Auto-play if nothing is currently playing or loaded
+		if (this.audio.paused && !this.current) {
+			this.next();
+		}
+	}
+
+	play() {
+		if (!this.current && this.queue.length > 0) {
+			this.next();
+		} else {
+			this.audio.play();
+		}
+	}
+
+	next() {
+		this.current = this.queue.shift();
+		if (this.current) {
+			this.audio.src = this.current;
+			this.audio.play();
+			console.log('Playing audio URL:', this.current);
+		} else {
+			this.stop();
+			if (this.onStopped) this.onStopped({ event: 'empty-queue', id: this.id });
+		}
+	}
+
+	stop() {
+		this.audio.pause();
+		this.audio.currentTime = 0;
+		this.audio.src = '';
+		this.queue = [];
+		this.current = null;
+		if (this.onStopped) this.onStopped({ event: 'stop', id: this.id });
+	}
+
+	destroy() {
+		this.audio.removeEventListener('ended', this._onEnded);
+		this.stop();
+		this.onStopped = null;
+		this.audio = null;
+	}
+}