CallOverlay.svelte 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. <script lang="ts">
  2. import { config, models, settings, showCallOverlay, TTSWorker } from '$lib/stores';
  3. import { onMount, tick, getContext, onDestroy, createEventDispatcher } from 'svelte';
  4. const dispatch = createEventDispatcher();
  5. import { blobToFile } from '$lib/utils';
  6. import { generateEmoji } from '$lib/apis';
  7. import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
  8. import { toast } from 'svelte-sonner';
  9. import Tooltip from '$lib/components/common/Tooltip.svelte';
  10. import VideoInputMenu from './CallOverlay/VideoInputMenu.svelte';
  11. import { KokoroWorker } from '$lib/workers/KokoroWorker';
  12. const i18n = getContext('i18n');
  13. export let eventTarget: EventTarget;
  14. export let submitPrompt: Function;
  15. export let stopResponse: Function;
  16. export let files;
  17. export let chatId;
  18. export let modelId;
  19. let wakeLock = null;
  20. let model = null;
  21. let loading = false;
  22. let confirmed = false;
  23. let interrupted = false;
  24. let assistantSpeaking = false;
  25. let emoji = null;
  26. let camera = false;
  27. let cameraStream = null;
  28. let chatStreaming = false;
  29. let rmsLevel = 0;
  30. let hasStartedSpeaking = false;
  31. let mediaRecorder;
  32. let audioStream = null;
  33. let audioChunks = [];
  34. let videoInputDevices = [];
  35. let selectedVideoInputDeviceId = null;
  36. const getVideoInputDevices = async () => {
  37. const devices = await navigator.mediaDevices.enumerateDevices();
  38. videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
  39. if (!!navigator.mediaDevices.getDisplayMedia) {
  40. videoInputDevices = [
  41. ...videoInputDevices,
  42. {
  43. deviceId: 'screen',
  44. label: 'Screen Share'
  45. }
  46. ];
  47. }
  48. console.log(videoInputDevices);
  49. if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
  50. selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
  51. }
  52. };
  53. const startCamera = async () => {
  54. await getVideoInputDevices();
  55. if (cameraStream === null) {
  56. camera = true;
  57. await tick();
  58. try {
  59. await startVideoStream();
  60. } catch (err) {
  61. console.error('Error accessing webcam: ', err);
  62. }
  63. }
  64. };
  65. const startVideoStream = async () => {
  66. const video = document.getElementById('camera-feed');
  67. if (video) {
  68. if (selectedVideoInputDeviceId === 'screen') {
  69. cameraStream = await navigator.mediaDevices.getDisplayMedia({
  70. video: {
  71. cursor: 'always'
  72. },
  73. audio: false
  74. });
  75. } else {
  76. cameraStream = await navigator.mediaDevices.getUserMedia({
  77. video: {
  78. deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
  79. }
  80. });
  81. }
  82. if (cameraStream) {
  83. await getVideoInputDevices();
  84. video.srcObject = cameraStream;
  85. await video.play();
  86. }
  87. }
  88. };
  89. const stopVideoStream = async () => {
  90. if (cameraStream) {
  91. const tracks = cameraStream.getTracks();
  92. tracks.forEach((track) => track.stop());
  93. }
  94. cameraStream = null;
  95. };
  96. const takeScreenshot = () => {
  97. const video = document.getElementById('camera-feed');
  98. const canvas = document.getElementById('camera-canvas');
  99. if (!canvas) {
  100. return;
  101. }
  102. const context = canvas.getContext('2d');
  103. // Make the canvas match the video dimensions
  104. canvas.width = video.videoWidth;
  105. canvas.height = video.videoHeight;
  106. // Draw the image from the video onto the canvas
  107. context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
  108. // Convert the canvas to a data base64 URL and console log it
  109. const dataURL = canvas.toDataURL('image/png');
  110. console.log(dataURL);
  111. return dataURL;
  112. };
  113. const stopCamera = async () => {
  114. await stopVideoStream();
  115. camera = false;
  116. };
  117. const MIN_DECIBELS = -55;
  118. const VISUALIZER_BUFFER_LENGTH = 300;
  119. const transcribeHandler = async (audioBlob) => {
  120. // Create a blob from the audio chunks
  121. await tick();
  122. const file = blobToFile(audioBlob, 'recording.wav');
  123. const res = await transcribeAudio(localStorage.token, file).catch((error) => {
  124. toast.error(`${error}`);
  125. return null;
  126. });
  127. if (res) {
  128. console.log(res.text);
  129. if (res.text !== '') {
  130. const _responses = await submitPrompt(res.text, { _raw: true });
  131. console.log(_responses);
  132. }
  133. }
  134. };
  135. const stopRecordingCallback = async (_continue = true) => {
  136. if ($showCallOverlay) {
  137. console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
  138. // deep copy the audioChunks array
  139. const _audioChunks = audioChunks.slice(0);
  140. audioChunks = [];
  141. mediaRecorder = false;
  142. if (_continue) {
  143. startRecording();
  144. }
  145. if (confirmed) {
  146. loading = true;
  147. emoji = null;
  148. if (cameraStream) {
  149. const imageUrl = takeScreenshot();
  150. files = [
  151. {
  152. type: 'image',
  153. url: imageUrl
  154. }
  155. ];
  156. }
  157. const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
  158. await transcribeHandler(audioBlob);
  159. confirmed = false;
  160. loading = false;
  161. }
  162. } else {
  163. audioChunks = [];
  164. mediaRecorder = false;
  165. if (audioStream) {
  166. const tracks = audioStream.getTracks();
  167. tracks.forEach((track) => track.stop());
  168. }
  169. audioStream = null;
  170. }
  171. };
  172. const startRecording = async () => {
  173. if ($showCallOverlay) {
  174. if (!audioStream) {
  175. audioStream = await navigator.mediaDevices.getUserMedia({
  176. audio: {
  177. echoCancellation: true,
  178. noiseSuppression: true,
  179. autoGainControl: true
  180. }
  181. });
  182. }
  183. mediaRecorder = new MediaRecorder(audioStream);
  184. mediaRecorder.onstart = () => {
  185. console.log('Recording started');
  186. audioChunks = [];
  187. analyseAudio(audioStream);
  188. };
  189. mediaRecorder.ondataavailable = (event) => {
  190. if (hasStartedSpeaking) {
  191. audioChunks.push(event.data);
  192. }
  193. };
  194. mediaRecorder.onstop = (e) => {
  195. console.log('Recording stopped', audioStream, e);
  196. stopRecordingCallback();
  197. };
  198. mediaRecorder.start();
  199. }
  200. };
  201. const stopAudioStream = async () => {
  202. try {
  203. if (mediaRecorder) {
  204. mediaRecorder.stop();
  205. }
  206. } catch (error) {
  207. console.log('Error stopping audio stream:', error);
  208. }
  209. if (!audioStream) return;
  210. audioStream.getAudioTracks().forEach(function (track) {
  211. track.stop();
  212. });
  213. audioStream = null;
  214. };
  215. // Function to calculate the RMS level from time domain data
  216. const calculateRMS = (data: Uint8Array) => {
  217. let sumSquares = 0;
  218. for (let i = 0; i < data.length; i++) {
  219. const normalizedValue = (data[i] - 128) / 128; // Normalize the data
  220. sumSquares += normalizedValue * normalizedValue;
  221. }
  222. return Math.sqrt(sumSquares / data.length);
  223. };
  224. const analyseAudio = (stream) => {
  225. const audioContext = new AudioContext();
  226. const audioStreamSource = audioContext.createMediaStreamSource(stream);
  227. const analyser = audioContext.createAnalyser();
  228. analyser.minDecibels = MIN_DECIBELS;
  229. audioStreamSource.connect(analyser);
  230. const bufferLength = analyser.frequencyBinCount;
  231. const domainData = new Uint8Array(bufferLength);
  232. const timeDomainData = new Uint8Array(analyser.fftSize);
  233. let lastSoundTime = Date.now();
  234. hasStartedSpeaking = false;
  235. console.log('🔊 Sound detection started', lastSoundTime, hasStartedSpeaking);
  236. const detectSound = () => {
  237. const processFrame = () => {
  238. if (!mediaRecorder || !$showCallOverlay) {
  239. return;
  240. }
  241. if (assistantSpeaking && !($settings?.voiceInterruption ?? false)) {
  242. // Mute the audio if the assistant is speaking
  243. analyser.maxDecibels = 0;
  244. analyser.minDecibels = -1;
  245. } else {
  246. analyser.minDecibels = MIN_DECIBELS;
  247. analyser.maxDecibels = -30;
  248. }
  249. analyser.getByteTimeDomainData(timeDomainData);
  250. analyser.getByteFrequencyData(domainData);
  251. // Calculate RMS level from time domain data
  252. rmsLevel = calculateRMS(timeDomainData);
  253. // Check if initial speech/noise has started
  254. const hasSound = domainData.some((value) => value > 0);
  255. if (hasSound) {
  256. // BIG RED TEXT
  257. console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
  258. if (!hasStartedSpeaking) {
  259. hasStartedSpeaking = true;
  260. stopAllAudio();
  261. }
  262. lastSoundTime = Date.now();
  263. }
  264. // Start silence detection only after initial speech/noise has been detected
  265. if (hasStartedSpeaking) {
  266. if (Date.now() - lastSoundTime > 2000) {
  267. confirmed = true;
  268. if (mediaRecorder) {
  269. console.log('%c%s', 'color: red; font-size: 20px;', '🔇 Silence detected');
  270. mediaRecorder.stop();
  271. return;
  272. }
  273. }
  274. }
  275. window.requestAnimationFrame(processFrame);
  276. };
  277. window.requestAnimationFrame(processFrame);
  278. };
  279. detectSound();
  280. };
  281. let finishedMessages = {};
  282. let currentMessageId = null;
  283. let currentUtterance = null;
  284. const speakSpeechSynthesisHandler = (content) => {
  285. if ($showCallOverlay) {
  286. return new Promise((resolve) => {
  287. let voices = [];
  288. const getVoicesLoop = setInterval(async () => {
  289. voices = await speechSynthesis.getVoices();
  290. if (voices.length > 0) {
  291. clearInterval(getVoicesLoop);
  292. const voice =
  293. voices
  294. ?.filter(
  295. (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
  296. )
  297. ?.at(0) ?? undefined;
  298. currentUtterance = new SpeechSynthesisUtterance(content);
  299. currentUtterance.rate = $settings.audio?.tts?.playbackRate ?? 1;
  300. if (voice) {
  301. currentUtterance.voice = voice;
  302. }
  303. speechSynthesis.speak(currentUtterance);
  304. currentUtterance.onend = async (e) => {
  305. await new Promise((r) => setTimeout(r, 200));
  306. resolve(e);
  307. };
  308. }
  309. }, 100);
  310. });
  311. } else {
  312. return Promise.resolve();
  313. }
  314. };
  315. const playAudio = (audio) => {
  316. if ($showCallOverlay) {
  317. return new Promise((resolve) => {
  318. const audioElement = document.getElementById('audioElement') as HTMLAudioElement;
  319. if (audioElement) {
  320. audioElement.src = audio.src;
  321. audioElement.muted = true;
  322. audioElement.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
  323. audioElement
  324. .play()
  325. .then(() => {
  326. audioElement.muted = false;
  327. })
  328. .catch((error) => {
  329. console.error(error);
  330. });
  331. audioElement.onended = async (e) => {
  332. await new Promise((r) => setTimeout(r, 100));
  333. resolve(e);
  334. };
  335. }
  336. });
  337. } else {
  338. return Promise.resolve();
  339. }
  340. };
  341. const stopAllAudio = async () => {
  342. assistantSpeaking = false;
  343. interrupted = true;
  344. if (chatStreaming) {
  345. stopResponse();
  346. }
  347. if (currentUtterance) {
  348. speechSynthesis.cancel();
  349. currentUtterance = null;
  350. }
  351. const audioElement = document.getElementById('audioElement');
  352. if (audioElement) {
  353. audioElement.muted = true;
  354. audioElement.pause();
  355. audioElement.currentTime = 0;
  356. }
  357. };
  358. let audioAbortController = new AbortController();
  359. // Audio cache map where key is the content and value is the Audio object.
  360. const audioCache = new Map();
  361. const emojiCache = new Map();
  362. const fetchAudio = async (content) => {
  363. if (!audioCache.has(content)) {
  364. try {
  365. // Set the emoji for the content if needed
  366. if ($settings?.showEmojiInCall ?? false) {
  367. const emoji = await generateEmoji(localStorage.token, modelId, content, chatId);
  368. if (emoji) {
  369. emojiCache.set(content, emoji);
  370. }
  371. }
  372. if ($settings.audio?.tts?.engine === 'browser-kokoro') {
  373. const blob = await $TTSWorker
  374. .generate({
  375. text: content,
  376. voice: $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice
  377. })
  378. .catch((error) => {
  379. console.error(error);
  380. toast.error(`${error}`);
  381. });
  382. if (blob) {
  383. audioCache.set(content, new Audio(blob));
  384. }
  385. } else if ($config.audio.tts.engine !== '') {
  386. const res = await synthesizeOpenAISpeech(
  387. localStorage.token,
  388. $settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
  389. ? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
  390. : $config?.audio?.tts?.voice,
  391. content
  392. ).catch((error) => {
  393. console.error(error);
  394. return null;
  395. });
  396. if (res) {
  397. const blob = await res.blob();
  398. const blobUrl = URL.createObjectURL(blob);
  399. audioCache.set(content, new Audio(blobUrl));
  400. }
  401. } else {
  402. audioCache.set(content, true);
  403. }
  404. } catch (error) {
  405. console.error('Error synthesizing speech:', error);
  406. }
  407. }
  408. return audioCache.get(content);
  409. };
  410. let messages = {};
  411. const monitorAndPlayAudio = async (id, signal) => {
  412. while (!signal.aborted) {
  413. if (messages[id] && messages[id].length > 0) {
  414. // Retrieve the next content string from the queue
  415. const content = messages[id].shift(); // Dequeues the content for playing
  416. if (audioCache.has(content)) {
  417. // If content is available in the cache, play it
  418. // Set the emoji for the content if available
  419. if (($settings?.showEmojiInCall ?? false) && emojiCache.has(content)) {
  420. emoji = emojiCache.get(content);
  421. } else {
  422. emoji = null;
  423. }
  424. if ($config.audio.tts.engine !== '') {
  425. try {
  426. console.log(
  427. '%c%s',
  428. 'color: red; font-size: 20px;',
  429. `Playing audio for content: ${content}`
  430. );
  431. const audio = audioCache.get(content);
  432. await playAudio(audio); // Here ensure that playAudio is indeed correct method to execute
  433. console.log(`Played audio for content: ${content}`);
  434. await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
  435. } catch (error) {
  436. console.error('Error playing audio:', error);
  437. }
  438. } else {
  439. await speakSpeechSynthesisHandler(content);
  440. }
  441. } else {
  442. // If not available in the cache, push it back to the queue and delay
  443. messages[id].unshift(content); // Re-queue the content at the start
  444. console.log(`Audio for "${content}" not yet available in the cache, re-queued...`);
  445. await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
  446. }
  447. } else if (finishedMessages[id] && messages[id] && messages[id].length === 0) {
  448. // If the message is finished and there are no more messages to process, break the loop
  449. assistantSpeaking = false;
  450. break;
  451. } else {
  452. // No messages to process, sleep for a bit
  453. await new Promise((resolve) => setTimeout(resolve, 200));
  454. }
  455. }
  456. console.log(`Audio monitoring and playing stopped for message ID ${id}`);
  457. };
  458. const chatStartHandler = async (e) => {
  459. const { id } = e.detail;
  460. chatStreaming = true;
  461. if (currentMessageId !== id) {
  462. console.log(`Received chat start event for message ID ${id}`);
  463. currentMessageId = id;
  464. if (audioAbortController) {
  465. audioAbortController.abort();
  466. }
  467. audioAbortController = new AbortController();
  468. assistantSpeaking = true;
  469. // Start monitoring and playing audio for the message ID
  470. monitorAndPlayAudio(id, audioAbortController.signal);
  471. }
  472. };
  473. const chatEventHandler = async (e) => {
  474. const { id, content } = e.detail;
  475. // "id" here is message id
  476. // if "id" is not the same as "currentMessageId" then do not process
  477. // "content" here is a sentence from the assistant,
  478. // there will be many sentences for the same "id"
  479. if (currentMessageId === id) {
  480. console.log(`Received chat event for message ID ${id}: ${content}`);
  481. try {
  482. if (messages[id] === undefined) {
  483. messages[id] = [content];
  484. } else {
  485. messages[id].push(content);
  486. }
  487. console.log(content);
  488. fetchAudio(content);
  489. } catch (error) {
  490. console.error('Failed to fetch or play audio:', error);
  491. }
  492. }
  493. };
  494. const chatFinishHandler = async (e) => {
  495. const { id, content } = e.detail;
  496. // "content" here is the entire message from the assistant
  497. finishedMessages[id] = true;
  498. chatStreaming = false;
  499. };
  500. onMount(async () => {
  501. const setWakeLock = async () => {
  502. try {
  503. wakeLock = await navigator.wakeLock.request('screen');
  504. } catch (err) {
  505. // The Wake Lock request has failed - usually system related, such as battery.
  506. console.log(err);
  507. }
  508. if (wakeLock) {
  509. // Add a listener to release the wake lock when the page is unloaded
  510. wakeLock.addEventListener('release', () => {
  511. // the wake lock has been released
  512. console.log('Wake Lock released');
  513. });
  514. }
  515. };
  516. if ('wakeLock' in navigator) {
  517. await setWakeLock();
  518. document.addEventListener('visibilitychange', async () => {
  519. // Re-request the wake lock if the document becomes visible
  520. if (wakeLock !== null && document.visibilityState === 'visible') {
  521. await setWakeLock();
  522. }
  523. });
  524. }
  525. model = $models.find((m) => m.id === modelId);
  526. startRecording();
  527. eventTarget.addEventListener('chat:start', chatStartHandler);
  528. eventTarget.addEventListener('chat', chatEventHandler);
  529. eventTarget.addEventListener('chat:finish', chatFinishHandler);
  530. return async () => {
  531. await stopAllAudio();
  532. stopAudioStream();
  533. eventTarget.removeEventListener('chat:start', chatStartHandler);
  534. eventTarget.removeEventListener('chat', chatEventHandler);
  535. eventTarget.removeEventListener('chat:finish', chatFinishHandler);
  536. audioAbortController.abort();
  537. await tick();
  538. await stopAllAudio();
  539. await stopRecordingCallback(false);
  540. await stopCamera();
  541. };
  542. });
  543. onDestroy(async () => {
  544. await stopAllAudio();
  545. await stopRecordingCallback(false);
  546. await stopCamera();
  547. await stopAudioStream();
  548. eventTarget.removeEventListener('chat:start', chatStartHandler);
  549. eventTarget.removeEventListener('chat', chatEventHandler);
  550. eventTarget.removeEventListener('chat:finish', chatFinishHandler);
  551. audioAbortController.abort();
  552. await tick();
  553. await stopAllAudio();
  554. });
  555. </script>
  556. {#if $showCallOverlay}
  557. <div class="max-w-lg w-full h-full max-h-[100dvh] flex flex-col justify-between p-3 md:p-6">
  558. {#if camera}
  559. <button
  560. type="button"
  561. class="flex justify-center items-center w-full h-20 min-h-20"
  562. on:click={() => {
  563. if (assistantSpeaking) {
  564. stopAllAudio();
  565. }
  566. }}
  567. >
  568. {#if emoji}
  569. <div
  570. class=" transition-all rounded-full"
  571. style="font-size:{rmsLevel * 100 > 4
  572. ? '4.5'
  573. : rmsLevel * 100 > 2
  574. ? '4.25'
  575. : rmsLevel * 100 > 1
  576. ? '3.75'
  577. : '3.5'}rem;width: 100%; text-align:center;"
  578. >
  579. {emoji}
  580. </div>
  581. {:else if loading || assistantSpeaking}
  582. <svg
  583. class="size-12 text-gray-900 dark:text-gray-400"
  584. viewBox="0 0 24 24"
  585. fill="currentColor"
  586. xmlns="http://www.w3.org/2000/svg"
  587. ><style>
  588. .spinner_qM83 {
  589. animation: spinner_8HQG 1.05s infinite;
  590. }
  591. .spinner_oXPr {
  592. animation-delay: 0.1s;
  593. }
  594. .spinner_ZTLf {
  595. animation-delay: 0.2s;
  596. }
  597. @keyframes spinner_8HQG {
  598. 0%,
  599. 57.14% {
  600. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  601. transform: translate(0);
  602. }
  603. 28.57% {
  604. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  605. transform: translateY(-6px);
  606. }
  607. 100% {
  608. transform: translate(0);
  609. }
  610. }
  611. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  612. class="spinner_qM83 spinner_oXPr"
  613. cx="12"
  614. cy="12"
  615. r="3"
  616. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  617. >
  618. {:else}
  619. <div
  620. class=" {rmsLevel * 100 > 4
  621. ? ' size-[4.5rem]'
  622. : rmsLevel * 100 > 2
  623. ? ' size-16'
  624. : rmsLevel * 100 > 1
  625. ? 'size-14'
  626. : 'size-12'} transition-all rounded-full {(model?.info?.meta
  627. ?.profile_image_url ?? '/static/favicon.png') !== '/static/favicon.png'
  628. ? ' bg-cover bg-center bg-no-repeat'
  629. : 'bg-black dark:bg-white'} bg-black dark:bg-white"
  630. style={(model?.info?.meta?.profile_image_url ?? '/static/favicon.png') !==
  631. '/static/favicon.png'
  632. ? `background-image: url('${model?.info?.meta?.profile_image_url}');`
  633. : ''}
  634. />
  635. {/if}
  636. <!-- navbar -->
  637. </button>
  638. {/if}
  639. <div class="flex justify-center items-center flex-1 h-full w-full max-h-full">
  640. {#if !camera}
  641. <button
  642. type="button"
  643. on:click={() => {
  644. if (assistantSpeaking) {
  645. stopAllAudio();
  646. }
  647. }}
  648. >
  649. {#if emoji}
  650. <div
  651. class=" transition-all rounded-full"
  652. style="font-size:{rmsLevel * 100 > 4
  653. ? '13'
  654. : rmsLevel * 100 > 2
  655. ? '12'
  656. : rmsLevel * 100 > 1
  657. ? '11.5'
  658. : '11'}rem;width:100%;text-align:center;"
  659. >
  660. {emoji}
  661. </div>
  662. {:else if loading || assistantSpeaking}
  663. <svg
  664. class="size-44 text-gray-900 dark:text-gray-400"
  665. viewBox="0 0 24 24"
  666. fill="currentColor"
  667. xmlns="http://www.w3.org/2000/svg"
  668. ><style>
  669. .spinner_qM83 {
  670. animation: spinner_8HQG 1.05s infinite;
  671. }
  672. .spinner_oXPr {
  673. animation-delay: 0.1s;
  674. }
  675. .spinner_ZTLf {
  676. animation-delay: 0.2s;
  677. }
  678. @keyframes spinner_8HQG {
  679. 0%,
  680. 57.14% {
  681. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  682. transform: translate(0);
  683. }
  684. 28.57% {
  685. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  686. transform: translateY(-6px);
  687. }
  688. 100% {
  689. transform: translate(0);
  690. }
  691. }
  692. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  693. class="spinner_qM83 spinner_oXPr"
  694. cx="12"
  695. cy="12"
  696. r="3"
  697. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  698. >
  699. {:else}
  700. <div
  701. class=" {rmsLevel * 100 > 4
  702. ? ' size-52'
  703. : rmsLevel * 100 > 2
  704. ? 'size-48'
  705. : rmsLevel * 100 > 1
  706. ? 'size-44'
  707. : 'size-40'} transition-all rounded-full {(model?.info?.meta
  708. ?.profile_image_url ?? '/static/favicon.png') !== '/static/favicon.png'
  709. ? ' bg-cover bg-center bg-no-repeat'
  710. : 'bg-black dark:bg-white'} "
  711. style={(model?.info?.meta?.profile_image_url ?? '/static/favicon.png') !==
  712. '/static/favicon.png'
  713. ? `background-image: url('${model?.info?.meta?.profile_image_url}');`
  714. : ''}
  715. />
  716. {/if}
  717. </button>
  718. {:else}
  719. <div class="relative flex video-container w-full max-h-full pt-2 pb-4 md:py-6 px-2 h-full">
  720. <video
  721. id="camera-feed"
  722. autoplay
  723. class="rounded-2xl h-full min-w-full object-cover object-center"
  724. playsinline
  725. />
  726. <canvas id="camera-canvas" style="display:none;" />
  727. <div class=" absolute top-4 md:top-8 left-4">
  728. <button
  729. type="button"
  730. class="p-1.5 text-white cursor-pointer backdrop-blur-xl bg-black/10 rounded-full"
  731. on:click={() => {
  732. stopCamera();
  733. }}
  734. >
  735. <svg
  736. xmlns="http://www.w3.org/2000/svg"
  737. viewBox="0 0 16 16"
  738. fill="currentColor"
  739. class="size-6"
  740. >
  741. <path
  742. d="M5.28 4.22a.75.75 0 0 0-1.06 1.06L6.94 8l-2.72 2.72a.75.75 0 1 0 1.06 1.06L8 9.06l2.72 2.72a.75.75 0 1 0 1.06-1.06L9.06 8l2.72-2.72a.75.75 0 0 0-1.06-1.06L8 6.94 5.28 4.22Z"
  743. />
  744. </svg>
  745. </button>
  746. </div>
  747. </div>
  748. {/if}
  749. </div>
  750. <div class="flex justify-between items-center pb-2 w-full">
  751. <div>
  752. {#if camera}
  753. <VideoInputMenu
  754. devices={videoInputDevices}
  755. on:change={async (e) => {
  756. console.log(e.detail);
  757. selectedVideoInputDeviceId = e.detail;
  758. await stopVideoStream();
  759. await startVideoStream();
  760. }}
  761. >
  762. <button class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900" type="button">
  763. <svg
  764. xmlns="http://www.w3.org/2000/svg"
  765. viewBox="0 0 20 20"
  766. fill="currentColor"
  767. class="size-5"
  768. >
  769. <path
  770. fill-rule="evenodd"
  771. d="M15.312 11.424a5.5 5.5 0 0 1-9.201 2.466l-.312-.311h2.433a.75.75 0 0 0 0-1.5H3.989a.75.75 0 0 0-.75.75v4.242a.75.75 0 0 0 1.5 0v-2.43l.31.31a7 7 0 0 0 11.712-3.138.75.75 0 0 0-1.449-.39Zm1.23-3.723a.75.75 0 0 0 .219-.53V2.929a.75.75 0 0 0-1.5 0V5.36l-.31-.31A7 7 0 0 0 3.239 8.188a.75.75 0 1 0 1.448.389A5.5 5.5 0 0 1 13.89 6.11l.311.31h-2.432a.75.75 0 0 0 0 1.5h4.243a.75.75 0 0 0 .53-.219Z"
  772. clip-rule="evenodd"
  773. />
  774. </svg>
  775. </button>
  776. </VideoInputMenu>
  777. {:else}
  778. <Tooltip content={$i18n.t('Camera')}>
  779. <button
  780. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  781. type="button"
  782. on:click={async () => {
  783. await navigator.mediaDevices.getUserMedia({ video: true });
  784. startCamera();
  785. }}
  786. >
  787. <svg
  788. xmlns="http://www.w3.org/2000/svg"
  789. fill="none"
  790. viewBox="0 0 24 24"
  791. stroke-width="1.5"
  792. stroke="currentColor"
  793. class="size-5"
  794. >
  795. <path
  796. stroke-linecap="round"
  797. stroke-linejoin="round"
  798. d="M6.827 6.175A2.31 2.31 0 0 1 5.186 7.23c-.38.054-.757.112-1.134.175C2.999 7.58 2.25 8.507 2.25 9.574V18a2.25 2.25 0 0 0 2.25 2.25h15A2.25 2.25 0 0 0 21.75 18V9.574c0-1.067-.75-1.994-1.802-2.169a47.865 47.865 0 0 0-1.134-.175 2.31 2.31 0 0 1-1.64-1.055l-.822-1.316a2.192 2.192 0 0 0-1.736-1.039 48.774 48.774 0 0 0-5.232 0 2.192 2.192 0 0 0-1.736 1.039l-.821 1.316Z"
  799. />
  800. <path
  801. stroke-linecap="round"
  802. stroke-linejoin="round"
  803. d="M16.5 12.75a4.5 4.5 0 1 1-9 0 4.5 4.5 0 0 1 9 0ZM18.75 10.5h.008v.008h-.008V10.5Z"
  804. />
  805. </svg>
  806. </button>
  807. </Tooltip>
  808. {/if}
  809. </div>
  810. <div>
  811. <button
  812. type="button"
  813. on:click={() => {
  814. if (assistantSpeaking) {
  815. stopAllAudio();
  816. }
  817. }}
  818. >
  819. <div class=" line-clamp-1 text-sm font-medium">
  820. {#if loading}
  821. {$i18n.t('Thinking...')}
  822. {:else if assistantSpeaking}
  823. {$i18n.t('Tap to interrupt')}
  824. {:else}
  825. {$i18n.t('Listening...')}
  826. {/if}
  827. </div>
  828. </button>
  829. </div>
  830. <div>
  831. <button
  832. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  833. on:click={async () => {
  834. await stopAudioStream();
  835. await stopVideoStream();
  836. console.log(audioStream);
  837. console.log(cameraStream);
  838. showCallOverlay.set(false);
  839. dispatch('close');
  840. }}
  841. type="button"
  842. >
  843. <svg
  844. xmlns="http://www.w3.org/2000/svg"
  845. viewBox="0 0 20 20"
  846. fill="currentColor"
  847. class="size-5"
  848. >
  849. <path
  850. d="M6.28 5.22a.75.75 0 0 0-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 1 0 1.06 1.06L10 11.06l3.72 3.72a.75.75 0 1 0 1.06-1.06L11.06 10l3.72-3.72a.75.75 0 0 0-1.06-1.06L10 8.94 6.28 5.22Z"
  851. />
  852. </svg>
  853. </button>
  854. </div>
  855. </div>
  856. </div>
  857. {/if}