CallOverlay.svelte 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900
  1. <script lang="ts">
  2. import { config, models, settings, showCallOverlay } from '$lib/stores';
  3. import { onMount, tick, getContext } from 'svelte';
  4. import {
  5. blobToFile,
  6. calculateSHA256,
  7. extractSentencesForAudio,
  8. findWordIndices
  9. } from '$lib/utils';
  10. import { generateEmoji } from '$lib/apis';
  11. import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
  12. import { toast } from 'svelte-sonner';
  13. import Tooltip from '$lib/components/common/Tooltip.svelte';
  14. import VideoInputMenu from './CallOverlay/VideoInputMenu.svelte';
  15. const i18n = getContext('i18n');
  16. export let eventTarget: EventTarget;
  17. export let submitPrompt: Function;
  18. export let stopResponse: Function;
  19. export let files;
  20. export let chatId;
  21. export let modelId;
  22. let model = null;
  23. let loading = false;
  24. let confirmed = false;
  25. let interrupted = false;
  26. let assistantSpeaking = false;
  27. let emoji = null;
  28. let camera = false;
  29. let cameraStream = null;
  30. let chatStreaming = false;
  31. let rmsLevel = 0;
  32. let hasStartedSpeaking = false;
  33. let mediaRecorder;
  34. let audioChunks = [];
  35. let videoInputDevices = [];
  36. let selectedVideoInputDeviceId = null;
  37. const getVideoInputDevices = async () => {
  38. const devices = await navigator.mediaDevices.enumerateDevices();
  39. videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
  40. if (!!navigator.mediaDevices.getDisplayMedia) {
  41. videoInputDevices = [
  42. ...videoInputDevices,
  43. {
  44. deviceId: 'screen',
  45. label: 'Screen Share'
  46. }
  47. ];
  48. }
  49. console.log(videoInputDevices);
  50. if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
  51. selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
  52. }
  53. };
  54. const startCamera = async () => {
  55. await getVideoInputDevices();
  56. if (cameraStream === null) {
  57. camera = true;
  58. await tick();
  59. try {
  60. await startVideoStream();
  61. } catch (err) {
  62. console.error('Error accessing webcam: ', err);
  63. }
  64. }
  65. };
  66. const startVideoStream = async () => {
  67. const video = document.getElementById('camera-feed');
  68. if (video) {
  69. if (selectedVideoInputDeviceId === 'screen') {
  70. cameraStream = await navigator.mediaDevices.getDisplayMedia({
  71. video: {
  72. cursor: 'always'
  73. },
  74. audio: false
  75. });
  76. } else {
  77. cameraStream = await navigator.mediaDevices.getUserMedia({
  78. video: {
  79. deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
  80. }
  81. });
  82. }
  83. if (cameraStream) {
  84. await getVideoInputDevices();
  85. video.srcObject = cameraStream;
  86. await video.play();
  87. }
  88. }
  89. };
  90. const stopVideoStream = async () => {
  91. if (cameraStream) {
  92. const tracks = cameraStream.getTracks();
  93. tracks.forEach((track) => track.stop());
  94. }
  95. cameraStream = null;
  96. };
  97. const takeScreenshot = () => {
  98. const video = document.getElementById('camera-feed');
  99. const canvas = document.getElementById('camera-canvas');
  100. if (!canvas) {
  101. return;
  102. }
  103. const context = canvas.getContext('2d');
  104. // Make the canvas match the video dimensions
  105. canvas.width = video.videoWidth;
  106. canvas.height = video.videoHeight;
  107. // Draw the image from the video onto the canvas
  108. context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
  109. // Convert the canvas to a data base64 URL and console log it
  110. const dataURL = canvas.toDataURL('image/png');
  111. console.log(dataURL);
  112. return dataURL;
  113. };
  114. const stopCamera = async () => {
  115. await stopVideoStream();
  116. camera = false;
  117. };
  118. const MIN_DECIBELS = -55;
  119. const VISUALIZER_BUFFER_LENGTH = 300;
  120. const transcribeHandler = async (audioBlob) => {
  121. // Create a blob from the audio chunks
  122. await tick();
  123. const file = blobToFile(audioBlob, 'recording.wav');
  124. const res = await transcribeAudio(localStorage.token, file).catch((error) => {
  125. toast.error(error);
  126. return null;
  127. });
  128. if (res) {
  129. console.log(res.text);
  130. if (res.text !== '') {
  131. const _responses = await submitPrompt(res.text, { _raw: true });
  132. console.log(_responses);
  133. }
  134. }
  135. };
  136. const stopRecordingCallback = async (_continue = true) => {
  137. if ($showCallOverlay) {
  138. console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
  139. // deep copy the audioChunks array
  140. const _audioChunks = audioChunks.slice(0);
  141. audioChunks = [];
  142. mediaRecorder = false;
  143. if (_continue) {
  144. startRecording();
  145. }
  146. if (confirmed) {
  147. loading = true;
  148. emoji = null;
  149. if (cameraStream) {
  150. const imageUrl = takeScreenshot();
  151. files = [
  152. {
  153. type: 'image',
  154. url: imageUrl
  155. }
  156. ];
  157. }
  158. const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
  159. await transcribeHandler(audioBlob);
  160. confirmed = false;
  161. loading = false;
  162. }
  163. } else {
  164. audioChunks = [];
  165. mediaRecorder = false;
  166. }
  167. };
  168. const startRecording = async () => {
  169. const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  170. mediaRecorder = new MediaRecorder(stream);
  171. mediaRecorder.onstart = () => {
  172. console.log('Recording started');
  173. audioChunks = [];
  174. analyseAudio(stream);
  175. };
  176. mediaRecorder.ondataavailable = (event) => {
  177. if (hasStartedSpeaking) {
  178. audioChunks.push(event.data);
  179. }
  180. };
  181. mediaRecorder.onstop = (e) => {
  182. console.log('Recording stopped', e);
  183. stopRecordingCallback();
  184. };
  185. mediaRecorder.start();
  186. };
  187. // Function to calculate the RMS level from time domain data
  188. const calculateRMS = (data: Uint8Array) => {
  189. let sumSquares = 0;
  190. for (let i = 0; i < data.length; i++) {
  191. const normalizedValue = (data[i] - 128) / 128; // Normalize the data
  192. sumSquares += normalizedValue * normalizedValue;
  193. }
  194. return Math.sqrt(sumSquares / data.length);
  195. };
  196. const analyseAudio = (stream) => {
  197. const audioContext = new AudioContext();
  198. const audioStreamSource = audioContext.createMediaStreamSource(stream);
  199. const analyser = audioContext.createAnalyser();
  200. analyser.minDecibels = MIN_DECIBELS;
  201. audioStreamSource.connect(analyser);
  202. const bufferLength = analyser.frequencyBinCount;
  203. const domainData = new Uint8Array(bufferLength);
  204. const timeDomainData = new Uint8Array(analyser.fftSize);
  205. let lastSoundTime = Date.now();
  206. hasStartedSpeaking = false;
  207. console.log('🔊 Sound detection started', lastSoundTime, hasStartedSpeaking);
  208. const detectSound = () => {
  209. const processFrame = () => {
  210. if (!mediaRecorder || !$showCallOverlay) {
  211. return;
  212. }
  213. if (assistantSpeaking && !($settings?.voiceInterruption ?? false)) {
  214. // Mute the audio if the assistant is speaking
  215. analyser.maxDecibels = 0;
  216. analyser.minDecibels = -1;
  217. } else {
  218. analyser.minDecibels = MIN_DECIBELS;
  219. analyser.maxDecibels = -30;
  220. }
  221. analyser.getByteTimeDomainData(timeDomainData);
  222. analyser.getByteFrequencyData(domainData);
  223. // Calculate RMS level from time domain data
  224. rmsLevel = calculateRMS(timeDomainData);
  225. // Check if initial speech/noise has started
  226. const hasSound = domainData.some((value) => value > 0);
  227. if (hasSound) {
  228. // BIG RED TEXT
  229. console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
  230. if (!hasStartedSpeaking) {
  231. hasStartedSpeaking = true;
  232. stopAllAudio();
  233. }
  234. lastSoundTime = Date.now();
  235. }
  236. // Start silence detection only after initial speech/noise has been detected
  237. if (hasStartedSpeaking) {
  238. if (Date.now() - lastSoundTime > 2000) {
  239. confirmed = true;
  240. if (mediaRecorder) {
  241. console.log('%c%s', 'color: red; font-size: 20px;', '🔇 Silence detected');
  242. mediaRecorder.stop();
  243. return;
  244. }
  245. }
  246. }
  247. window.requestAnimationFrame(processFrame);
  248. };
  249. window.requestAnimationFrame(processFrame);
  250. };
  251. detectSound();
  252. };
  253. let finishedMessages = {};
  254. let currentMessageId = null;
  255. let currentUtterance = null;
  256. const speakSpeechSynthesisHandler = (content) => {
  257. if ($showCallOverlay) {
  258. return new Promise((resolve) => {
  259. let voices = [];
  260. const getVoicesLoop = setInterval(async () => {
  261. voices = await speechSynthesis.getVoices();
  262. if (voices.length > 0) {
  263. clearInterval(getVoicesLoop);
  264. const voice =
  265. voices
  266. ?.filter(
  267. (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
  268. )
  269. ?.at(0) ?? undefined;
  270. currentUtterance = new SpeechSynthesisUtterance(content);
  271. if (voice) {
  272. currentUtterance.voice = voice;
  273. }
  274. speechSynthesis.speak(currentUtterance);
  275. currentUtterance.onend = async (e) => {
  276. await new Promise((r) => setTimeout(r, 200));
  277. resolve(e);
  278. };
  279. }
  280. }, 100);
  281. });
  282. } else {
  283. return Promise.resolve();
  284. }
  285. };
  286. const playAudio = (audio) => {
  287. if ($showCallOverlay) {
  288. return new Promise((resolve) => {
  289. const audioElement = document.getElementById('audioElement');
  290. if (audioElement) {
  291. audioElement.src = audio.src;
  292. audioElement.muted = true;
  293. audioElement
  294. .play()
  295. .then(() => {
  296. audioElement.muted = false;
  297. })
  298. .catch((error) => {
  299. console.error(error);
  300. });
  301. audioElement.onended = async (e) => {
  302. await new Promise((r) => setTimeout(r, 100));
  303. resolve(e);
  304. };
  305. }
  306. });
  307. } else {
  308. return Promise.resolve();
  309. }
  310. };
  311. const stopAllAudio = async () => {
  312. assistantSpeaking = false;
  313. interrupted = true;
  314. if (chatStreaming) {
  315. stopResponse();
  316. }
  317. if (currentUtterance) {
  318. speechSynthesis.cancel();
  319. currentUtterance = null;
  320. }
  321. const audioElement = document.getElementById('audioElement');
  322. if (audioElement) {
  323. audioElement.muted = true;
  324. audioElement.pause();
  325. audioElement.currentTime = 0;
  326. }
  327. };
  328. let audioAbortController = new AbortController();
  329. // Audio cache map where key is the content and value is the Audio object.
  330. const audioCache = new Map();
  331. const emojiCache = new Map();
  332. const fetchAudio = async (content) => {
  333. if (!audioCache.has(content)) {
  334. try {
  335. // Set the emoji for the content if needed
  336. if ($settings?.showEmojiInCall ?? false) {
  337. const emoji = await generateEmoji(localStorage.token, modelId, content, chatId);
  338. if (emoji) {
  339. emojiCache.set(content, emoji);
  340. }
  341. }
  342. if ($config.audio.tts.engine !== '') {
  343. const res = await synthesizeOpenAISpeech(
  344. localStorage.token,
  345. $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
  346. content
  347. ).catch((error) => {
  348. console.error(error);
  349. return null;
  350. });
  351. if (res) {
  352. const blob = await res.blob();
  353. const blobUrl = URL.createObjectURL(blob);
  354. audioCache.set(content, new Audio(blobUrl));
  355. }
  356. } else {
  357. audioCache.set(content, true);
  358. }
  359. } catch (error) {
  360. console.error('Error synthesizing speech:', error);
  361. }
  362. }
  363. return audioCache.get(content);
  364. };
  365. let messages = {};
  366. const monitorAndPlayAudio = async (id, signal) => {
  367. while (!signal.aborted) {
  368. if (messages[id] && messages[id].length > 0) {
  369. // Retrieve the next content string from the queue
  370. const content = messages[id].shift(); // Dequeues the content for playing
  371. if (audioCache.has(content)) {
  372. // If content is available in the cache, play it
  373. // Set the emoji for the content if available
  374. if (($settings?.showEmojiInCall ?? false) && emojiCache.has(content)) {
  375. emoji = emojiCache.get(content);
  376. } else {
  377. emoji = null;
  378. }
  379. if ($config.audio.tts.engine !== '') {
  380. try {
  381. console.log(
  382. '%c%s',
  383. 'color: red; font-size: 20px;',
  384. `Playing audio for content: ${content}`
  385. );
  386. const audio = audioCache.get(content);
  387. await playAudio(audio); // Here ensure that playAudio is indeed correct method to execute
  388. console.log(`Played audio for content: ${content}`);
  389. await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
  390. } catch (error) {
  391. console.error('Error playing audio:', error);
  392. }
  393. } else {
  394. await speakSpeechSynthesisHandler(content);
  395. }
  396. } else {
  397. // If not available in the cache, push it back to the queue and delay
  398. messages[id].unshift(content); // Re-queue the content at the start
  399. console.log(`Audio for "${content}" not yet available in the cache, re-queued...`);
  400. await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
  401. }
  402. } else if (finishedMessages[id] && messages[id] && messages[id].length === 0) {
  403. // If the message is finished and there are no more messages to process, break the loop
  404. assistantSpeaking = false;
  405. break;
  406. } else {
  407. // No messages to process, sleep for a bit
  408. await new Promise((resolve) => setTimeout(resolve, 200));
  409. }
  410. }
  411. console.log(`Audio monitoring and playing stopped for message ID ${id}`);
  412. };
  413. onMount(async () => {
  414. model = $models.find((m) => m.id === modelId);
  415. startRecording();
  416. const chatStartHandler = async (e) => {
  417. const { id } = e.detail;
  418. chatStreaming = true;
  419. if (currentMessageId !== id) {
  420. console.log(`Received chat start event for message ID ${id}`);
  421. currentMessageId = id;
  422. if (audioAbortController) {
  423. audioAbortController.abort();
  424. }
  425. audioAbortController = new AbortController();
  426. assistantSpeaking = true;
  427. // Start monitoring and playing audio for the message ID
  428. monitorAndPlayAudio(id, audioAbortController.signal);
  429. }
  430. };
  431. const chatEventHandler = async (e) => {
  432. const { id, content } = e.detail;
  433. // "id" here is message id
  434. // if "id" is not the same as "currentMessageId" then do not process
  435. // "content" here is a sentence from the assistant,
  436. // there will be many sentences for the same "id"
  437. if (currentMessageId === id) {
  438. console.log(`Received chat event for message ID ${id}: ${content}`);
  439. try {
  440. if (messages[id] === undefined) {
  441. messages[id] = [content];
  442. } else {
  443. messages[id].push(content);
  444. }
  445. console.log(content);
  446. fetchAudio(content);
  447. } catch (error) {
  448. console.error('Failed to fetch or play audio:', error);
  449. }
  450. }
  451. };
  452. const chatFinishHandler = async (e) => {
  453. const { id, content } = e.detail;
  454. // "content" here is the entire message from the assistant
  455. finishedMessages[id] = true;
  456. chatStreaming = false;
  457. };
  458. eventTarget.addEventListener('chat:start', chatStartHandler);
  459. eventTarget.addEventListener('chat', chatEventHandler);
  460. eventTarget.addEventListener('chat:finish', chatFinishHandler);
  461. return async () => {
  462. eventTarget.removeEventListener('chat:start', chatStartHandler);
  463. eventTarget.removeEventListener('chat', chatEventHandler);
  464. eventTarget.removeEventListener('chat:finish', chatFinishHandler);
  465. audioAbortController.abort();
  466. await tick();
  467. await stopAllAudio();
  468. await stopRecordingCallback(false);
  469. await stopCamera();
  470. };
  471. });
  472. </script>
  473. {#if $showCallOverlay}
  474. <div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
  475. <div
  476. class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
  477. >
  478. <div class="max-w-lg w-full h-screen max-h-[100dvh] flex flex-col justify-between p-3 md:p-6">
  479. {#if camera}
  480. <button
  481. type="button"
  482. class="flex justify-center items-center w-full h-20 min-h-20"
  483. on:click={() => {
  484. if (assistantSpeaking) {
  485. stopAllAudio();
  486. }
  487. }}
  488. >
  489. {#if emoji}
  490. <div
  491. class=" transition-all rounded-full"
  492. style="font-size:{rmsLevel * 100 > 4
  493. ? '4.5'
  494. : rmsLevel * 100 > 2
  495. ? '4.25'
  496. : rmsLevel * 100 > 1
  497. ? '3.75'
  498. : '3.5'}rem;width: 100%; text-align:center;"
  499. >
  500. {emoji}
  501. </div>
  502. {:else if loading || assistantSpeaking}
  503. <svg
  504. class="size-12 text-gray-900 dark:text-gray-400"
  505. viewBox="0 0 24 24"
  506. fill="currentColor"
  507. xmlns="http://www.w3.org/2000/svg"
  508. ><style>
  509. .spinner_qM83 {
  510. animation: spinner_8HQG 1.05s infinite;
  511. }
  512. .spinner_oXPr {
  513. animation-delay: 0.1s;
  514. }
  515. .spinner_ZTLf {
  516. animation-delay: 0.2s;
  517. }
  518. @keyframes spinner_8HQG {
  519. 0%,
  520. 57.14% {
  521. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  522. transform: translate(0);
  523. }
  524. 28.57% {
  525. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  526. transform: translateY(-6px);
  527. }
  528. 100% {
  529. transform: translate(0);
  530. }
  531. }
  532. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  533. class="spinner_qM83 spinner_oXPr"
  534. cx="12"
  535. cy="12"
  536. r="3"
  537. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  538. >
  539. {:else}
  540. <div
  541. class=" {rmsLevel * 100 > 4
  542. ? ' size-[4.5rem]'
  543. : rmsLevel * 100 > 2
  544. ? ' size-16'
  545. : rmsLevel * 100 > 1
  546. ? 'size-14'
  547. : 'size-12'} transition-all rounded-full {(model?.info?.meta
  548. ?.profile_image_url ?? '/static/favicon.png') !== '/static/favicon.png'
  549. ? ' bg-cover bg-center bg-no-repeat'
  550. : 'bg-black dark:bg-white'} bg-black dark:bg-white"
  551. style={(model?.info?.meta?.profile_image_url ?? '/static/favicon.png') !==
  552. '/static/favicon.png'
  553. ? `background-image: url('${model?.info?.meta?.profile_image_url}');`
  554. : ''}
  555. />
  556. {/if}
  557. <!-- navbar -->
  558. </button>
  559. {/if}
  560. <div class="flex justify-center items-center flex-1 h-full w-full max-h-full">
  561. {#if !camera}
  562. <button
  563. type="button"
  564. on:click={() => {
  565. if (assistantSpeaking) {
  566. stopAllAudio();
  567. }
  568. }}
  569. >
  570. {#if emoji}
  571. <div
  572. class=" transition-all rounded-full"
  573. style="font-size:{rmsLevel * 100 > 4
  574. ? '13'
  575. : rmsLevel * 100 > 2
  576. ? '12'
  577. : rmsLevel * 100 > 1
  578. ? '11.5'
  579. : '11'}rem;width:100%;text-align:center;"
  580. >
  581. {emoji}
  582. </div>
  583. {:else if loading || assistantSpeaking}
  584. <svg
  585. class="size-44 text-gray-900 dark:text-gray-400"
  586. viewBox="0 0 24 24"
  587. fill="currentColor"
  588. xmlns="http://www.w3.org/2000/svg"
  589. ><style>
  590. .spinner_qM83 {
  591. animation: spinner_8HQG 1.05s infinite;
  592. }
  593. .spinner_oXPr {
  594. animation-delay: 0.1s;
  595. }
  596. .spinner_ZTLf {
  597. animation-delay: 0.2s;
  598. }
  599. @keyframes spinner_8HQG {
  600. 0%,
  601. 57.14% {
  602. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  603. transform: translate(0);
  604. }
  605. 28.57% {
  606. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  607. transform: translateY(-6px);
  608. }
  609. 100% {
  610. transform: translate(0);
  611. }
  612. }
  613. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  614. class="spinner_qM83 spinner_oXPr"
  615. cx="12"
  616. cy="12"
  617. r="3"
  618. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  619. >
  620. {:else}
  621. <div
  622. class=" {rmsLevel * 100 > 4
  623. ? ' size-52'
  624. : rmsLevel * 100 > 2
  625. ? 'size-48'
  626. : rmsLevel * 100 > 1
  627. ? 'size-[11.5rem]'
  628. : 'size-44'} transition-all rounded-full {(model?.info?.meta
  629. ?.profile_image_url ?? '/static/favicon.png') !== '/static/favicon.png'
  630. ? ' bg-cover bg-center bg-no-repeat'
  631. : 'bg-black dark:bg-white'} "
  632. style={(model?.info?.meta?.profile_image_url ?? '/static/favicon.png') !==
  633. '/static/favicon.png'
  634. ? `background-image: url('${model?.info?.meta?.profile_image_url}');`
  635. : ''}
  636. />
  637. {/if}
  638. </button>
  639. {:else}
  640. <div
  641. class="relative flex video-container w-full max-h-full pt-2 pb-4 md:py-6 px-2 h-full"
  642. >
  643. <video
  644. id="camera-feed"
  645. autoplay
  646. class="rounded-2xl h-full min-w-full object-cover object-center"
  647. playsinline
  648. />
  649. <canvas id="camera-canvas" style="display:none;" />
  650. <div class=" absolute top-4 md:top-8 left-4">
  651. <button
  652. type="button"
  653. class="p-1.5 text-white cursor-pointer backdrop-blur-xl bg-black/10 rounded-full"
  654. on:click={() => {
  655. stopCamera();
  656. }}
  657. >
  658. <svg
  659. xmlns="http://www.w3.org/2000/svg"
  660. viewBox="0 0 16 16"
  661. fill="currentColor"
  662. class="size-6"
  663. >
  664. <path
  665. d="M5.28 4.22a.75.75 0 0 0-1.06 1.06L6.94 8l-2.72 2.72a.75.75 0 1 0 1.06 1.06L8 9.06l2.72 2.72a.75.75 0 1 0 1.06-1.06L9.06 8l2.72-2.72a.75.75 0 0 0-1.06-1.06L8 6.94 5.28 4.22Z"
  666. />
  667. </svg>
  668. </button>
  669. </div>
  670. </div>
  671. {/if}
  672. </div>
  673. <div class="flex justify-between items-center pb-2 w-full">
  674. <div>
  675. {#if camera}
  676. <VideoInputMenu
  677. devices={videoInputDevices}
  678. on:change={async (e) => {
  679. console.log(e.detail);
  680. selectedVideoInputDeviceId = e.detail;
  681. await stopVideoStream();
  682. await startVideoStream();
  683. }}
  684. >
  685. <button class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900" type="button">
  686. <svg
  687. xmlns="http://www.w3.org/2000/svg"
  688. viewBox="0 0 20 20"
  689. fill="currentColor"
  690. class="size-5"
  691. >
  692. <path
  693. fill-rule="evenodd"
  694. d="M15.312 11.424a5.5 5.5 0 0 1-9.201 2.466l-.312-.311h2.433a.75.75 0 0 0 0-1.5H3.989a.75.75 0 0 0-.75.75v4.242a.75.75 0 0 0 1.5 0v-2.43l.31.31a7 7 0 0 0 11.712-3.138.75.75 0 0 0-1.449-.39Zm1.23-3.723a.75.75 0 0 0 .219-.53V2.929a.75.75 0 0 0-1.5 0V5.36l-.31-.31A7 7 0 0 0 3.239 8.188a.75.75 0 1 0 1.448.389A5.5 5.5 0 0 1 13.89 6.11l.311.31h-2.432a.75.75 0 0 0 0 1.5h4.243a.75.75 0 0 0 .53-.219Z"
  695. clip-rule="evenodd"
  696. />
  697. </svg>
  698. </button>
  699. </VideoInputMenu>
  700. {:else}
  701. <Tooltip content={$i18n.t('Camera')}>
  702. <button
  703. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  704. type="button"
  705. on:click={async () => {
  706. await navigator.mediaDevices.getUserMedia({ video: true });
  707. startCamera();
  708. }}
  709. >
  710. <svg
  711. xmlns="http://www.w3.org/2000/svg"
  712. fill="none"
  713. viewBox="0 0 24 24"
  714. stroke-width="1.5"
  715. stroke="currentColor"
  716. class="size-5"
  717. >
  718. <path
  719. stroke-linecap="round"
  720. stroke-linejoin="round"
  721. d="M6.827 6.175A2.31 2.31 0 0 1 5.186 7.23c-.38.054-.757.112-1.134.175C2.999 7.58 2.25 8.507 2.25 9.574V18a2.25 2.25 0 0 0 2.25 2.25h15A2.25 2.25 0 0 0 21.75 18V9.574c0-1.067-.75-1.994-1.802-2.169a47.865 47.865 0 0 0-1.134-.175 2.31 2.31 0 0 1-1.64-1.055l-.822-1.316a2.192 2.192 0 0 0-1.736-1.039 48.774 48.774 0 0 0-5.232 0 2.192 2.192 0 0 0-1.736 1.039l-.821 1.316Z"
  722. />
  723. <path
  724. stroke-linecap="round"
  725. stroke-linejoin="round"
  726. d="M16.5 12.75a4.5 4.5 0 1 1-9 0 4.5 4.5 0 0 1 9 0ZM18.75 10.5h.008v.008h-.008V10.5Z"
  727. />
  728. </svg>
  729. </button>
  730. </Tooltip>
  731. {/if}
  732. </div>
  733. <div>
  734. <button
  735. type="button"
  736. on:click={() => {
  737. if (assistantSpeaking) {
  738. stopAllAudio();
  739. }
  740. }}
  741. >
  742. <div class=" line-clamp-1 text-sm font-medium">
  743. {#if loading}
  744. {$i18n.t('Thinking...')}
  745. {:else if assistantSpeaking}
  746. {$i18n.t('Tap to interrupt')}
  747. {:else}
  748. {$i18n.t('Listening...')}
  749. {/if}
  750. </div>
  751. </button>
  752. </div>
  753. <div>
  754. <button
  755. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  756. on:click={async () => {
  757. showCallOverlay.set(false);
  758. }}
  759. type="button"
  760. >
  761. <svg
  762. xmlns="http://www.w3.org/2000/svg"
  763. viewBox="0 0 20 20"
  764. fill="currentColor"
  765. class="size-5"
  766. >
  767. <path
  768. d="M6.28 5.22a.75.75 0 0 0-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 1 0 1.06 1.06L10 11.06l3.72 3.72a.75.75 0 1 0 1.06-1.06L11.06 10l3.72-3.72a.75.75 0 0 0-1.06-1.06L10 8.94 6.28 5.22Z"
  769. />
  770. </svg>
  771. </button>
  772. </div>
  773. </div>
  774. </div>
  775. </div>
  776. </div>
  777. {/if}