8 月之前 · c94ffa0e2b
--- a/examples/astra/astra/ContentView.swift
+++ b/examples/astra/astra/ContentView.swift
@@ -10,7 +10,7 @@ struct ContentView: View {
 
				     @State private var bufferSeconds: Double = 0.5 // or whatever the actual buffer size is
			
 
				     @State private var modelState: ModelState = .unloaded
			
 
				 
			
 
				-    @AppStorage("selectedModel") private var selectedModel: String = "large"
			
 
				+    @AppStorage("selectedModel") private var selectedModel: String = "large-v3"
			
 
				     @AppStorage("selectedLanguage") private var selectedLanguage: String = "english"
			
 
				     @AppStorage("selectedTask") private var selectedTask: String = "transcribe"
			
 
				 
			
@@ -18,10 +18,13 @@ struct ContentView: View {
 
				     @State private var currentMemo = ""
			
 
				     @State private var lastVoiceActivityTime = Date()
			
 
				     @State private var silenceTimer: Timer?
			
 
				-    @State private var voiceActivityThreshold: Float = 0.3 // Start with a lower value
			
 
				+    @State private var voiceActivityThreshold: Float = 0.1 // Lower this value
			
 
				     @State private var silenceTimeThreshold = 1.0
			
 
				     @State private var debugText = ""
			
 
				     @State private var apiEndpoint = "http://192.168.212.74:8000/v1/chat/completions"
			
 
				+    @State private var audioBuffer: [Float] = []
			
 
				+    @State private var bufferDuration: Double = 0.5 // 0.5 seconds buffer
			
 
				+    @State private var isInitialTranscription = true
			
 
				 
			
 
				     var body: some View {
			
 
				         VStack {
			
@@ -37,7 +40,7 @@ struct ContentView: View {
 
				             }
			
 
				 
			
 
				             Picker("Model", selection: $selectedModel) {
			
 
				-                Text("large").tag("large")
			
 
				+                Text("large-v3").tag("large-v3")
			
 
				                 Text("base").tag("base")
			
 
				                 Text("small").tag("small")
			
 
				             }
			
@@ -71,12 +74,26 @@ struct ContentView: View {
 
				                 whisperKit = try await WhisperKit(verbose: true)
			
 
				                 print("WhisperKit initialized successfully")
			
 
				                 startListening()
			
 
				+                startAudioBuffering() // Add this line
			
 
				             } catch {
			
 
				                 print("Error initializing WhisperKit: \(error)")
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 
			
 
				+    // Add this new function
			
 
				+    private func startAudioBuffering() {
			
 
				+        Task {
			
 
				+            while true {
			
 
				+                if let samples = whisperKit?.audioProcessor.audioSamples {
			
 
				+                    let bufferSize = Int(Double(WhisperKit.sampleRate) * bufferDuration)
			
 
				+                    audioBuffer = Array(samples.suffix(bufferSize))
			
 
				+                }
			
 
				+                try await Task.sleep(nanoseconds: 100_000_000) // Update every 0.1 seconds
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     private func loadModel(_ model: String) {
			
 
				         Task {
			
 
				             let success = try await loadModel(selectedModel)
			
@@ -170,6 +187,7 @@ struct ContentView: View {
 
				     private func startNewMemo() {
			
 
				         isRecordingMemo = true
			
 
				         currentMemo = ""
			
 
				+        isInitialTranscription = true
			
 
				         silenceTimer?.invalidate()
			
 
				         silenceTimer = Timer.scheduledTimer(withTimeInterval: 0.5, repeats: true) { _ in
			
 
				             checkSilence()
			
@@ -183,15 +201,22 @@ struct ContentView: View {
 
				             while isRecordingMemo {
			
 
				                 if let samples = whisperKit?.audioProcessor.audioSamples, samples.count > WhisperKit.sampleRate {
			
 
				                     do {
			
 
				-                        let result = try await whisperKit?.transcribe(audioArray: Array(samples))
			
 
				+                        let samplesToTranscribe: [Float]
			
 
				+                        if isInitialTranscription {
			
 
				+                            samplesToTranscribe = audioBuffer + samples
			
 
				+                            isInitialTranscription = false
			
 
				+                        } else {
			
 
				+                            samplesToTranscribe = Array(samples)
			
 
				+                        }
			
 
				+                        
			
 
				+                        let result = try await whisperKit?.transcribe(audioArray: samplesToTranscribe)
			
 
				                         await MainActor.run {
			
 
				                             let newText = result?.first?.text ?? ""
			
 
				                             if !newText.isEmpty {
			
 
				-                                currentMemo += newText
			
 
				-                                currentText += newText
			
 
				+                                currentMemo = newText
			
 
				+                                currentText = newText
			
 
				                             }
			
 
				                         }
			
 
				-                        whisperKit?.audioProcessor.purgeAudioSamples(keepingLast: 0)
			
 
				                     } catch {
			
 
				                         print("Transcription error: \(error)")
			
 
				                     }
			
@@ -233,7 +258,8 @@ struct ContentView: View {
 
				         let payload: [String: Any] = [
			
 
				             "model": "llama-3.1-8b",
			
 
				             "messages": [["role": "user", "content": memo]],
			
 
				-            "temperature": 0.7
			
 
				+            "temperature": 0.7,
			
 
				+            "stream": true
			
 
				         ]
			
 
				 
			
 
				         guard let jsonData = try? JSONSerialization.data(withJSONObject: payload) else {