|
@@ -1,6 +1,7 @@
|
|
|
import SwiftUI
|
|
|
import WhisperKit
|
|
|
import AVFoundation
|
|
|
+import Foundation
|
|
|
|
|
|
struct ContentView: View {
|
|
|
@State private var whisperKit: WhisperKit?
|
|
@@ -8,11 +9,11 @@ struct ContentView: View {
|
|
|
@State private var currentText = ""
|
|
|
@State private var bufferSeconds: Double = 0.5 // or whatever the actual buffer size is
|
|
|
@State private var modelState: ModelState = .unloaded
|
|
|
-
|
|
|
+
|
|
|
@AppStorage("selectedModel") private var selectedModel: String = "large"
|
|
|
@AppStorage("selectedLanguage") private var selectedLanguage: String = "english"
|
|
|
@AppStorage("selectedTask") private var selectedTask: String = "transcribe"
|
|
|
-
|
|
|
+
|
|
|
@State private var isRecordingMemo = false
|
|
|
@State private var currentMemo = ""
|
|
|
@State private var lastVoiceActivityTime = Date()
|
|
@@ -20,20 +21,21 @@ struct ContentView: View {
|
|
|
@State private var voiceActivityThreshold: Float = 0.3 // Start with a lower value
|
|
|
@State private var silenceTimeThreshold = 1.0
|
|
|
@State private var debugText = ""
|
|
|
+ @State private var apiEndpoint = "http://192.168.212.74:8000/v1/chat/completions"
|
|
|
|
|
|
var body: some View {
|
|
|
VStack {
|
|
|
Text(currentText)
|
|
|
.padding()
|
|
|
-
|
|
|
+
|
|
|
Text(isListening ? "Listening..." : "Not listening")
|
|
|
.foregroundColor(isListening ? .green : .red)
|
|
|
-
|
|
|
+
|
|
|
if isRecordingMemo {
|
|
|
Text("Recording memo...")
|
|
|
.foregroundColor(.blue)
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
Picker("Model", selection: $selectedModel) {
|
|
|
Text("large").tag("large")
|
|
|
Text("base").tag("base")
|
|
@@ -41,19 +43,19 @@ struct ContentView: View {
|
|
|
}
|
|
|
.pickerStyle(SegmentedPickerStyle())
|
|
|
.padding()
|
|
|
-
|
|
|
+
|
|
|
Button("Load Model") {
|
|
|
loadModel(selectedModel)
|
|
|
}
|
|
|
.disabled(modelState == .loaded)
|
|
|
.padding()
|
|
|
-
|
|
|
+
|
|
|
Text("Model State: \(modelState.description)")
|
|
|
-
|
|
|
+
|
|
|
Text(debugText)
|
|
|
.font(.caption)
|
|
|
.foregroundColor(.gray)
|
|
|
-
|
|
|
+
|
|
|
Slider(value: $voiceActivityThreshold, in: 0.01...1.0) {
|
|
|
Text("Voice Activity Threshold: \(voiceActivityThreshold, specifier: "%.2f")")
|
|
|
}
|
|
@@ -62,7 +64,7 @@ struct ContentView: View {
|
|
|
setupWhisperKit()
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func setupWhisperKit() {
|
|
|
Task {
|
|
|
do {
|
|
@@ -74,7 +76,7 @@ struct ContentView: View {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func loadModel(_ model: String) {
|
|
|
Task {
|
|
|
let success = try await loadModel(selectedModel)
|
|
@@ -85,13 +87,13 @@ struct ContentView: View {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func startListening() {
|
|
|
guard let audioProcessor = whisperKit?.audioProcessor else {
|
|
|
print("AudioProcessor not available")
|
|
|
return
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
do {
|
|
|
try audioProcessor.startRecordingLive { buffer in
|
|
|
DispatchQueue.main.async {
|
|
@@ -103,22 +105,22 @@ struct ContentView: View {
|
|
|
print("Error starting listening: \(error)")
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func checkVoiceActivity() {
|
|
|
guard let audioProcessor = whisperKit?.audioProcessor else { return }
|
|
|
-
|
|
|
+
|
|
|
let voiceDetected = AudioProcessor.isVoiceDetected(
|
|
|
in: audioProcessor.relativeEnergy,
|
|
|
nextBufferInSeconds: Float(bufferSeconds),
|
|
|
silenceThreshold: Float(voiceActivityThreshold)
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
// Debug logging
|
|
|
let energyValuesToConsider = Int(Float(bufferSeconds) / 0.1)
|
|
|
let nextBufferEnergies = audioProcessor.relativeEnergy.suffix(energyValuesToConsider)
|
|
|
let numberOfValuesToCheck = max(10, nextBufferEnergies.count - 10)
|
|
|
let relevantEnergies = Array(nextBufferEnergies.prefix(numberOfValuesToCheck))
|
|
|
-
|
|
|
+
|
|
|
debugText = """
|
|
|
Buffer seconds: \(bufferSeconds)
|
|
|
Energy values to consider: \(energyValuesToConsider)
|
|
@@ -128,7 +130,7 @@ struct ContentView: View {
|
|
|
Max energy: \(relevantEnergies.max() ?? 0)
|
|
|
Voice detected: \(voiceDetected)
|
|
|
"""
|
|
|
-
|
|
|
+
|
|
|
if voiceDetected {
|
|
|
lastVoiceActivityTime = Date()
|
|
|
if !isRecordingMemo {
|
|
@@ -138,16 +140,16 @@ struct ContentView: View {
|
|
|
checkSilence()
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func checkSilence() {
|
|
|
let silenceDuration = Date().timeIntervalSince(lastVoiceActivityTime)
|
|
|
debugText += "\nSilence duration: \(silenceDuration)"
|
|
|
-
|
|
|
+
|
|
|
if silenceDuration > silenceTimeThreshold {
|
|
|
endCurrentMemo()
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func endCurrentMemo() {
|
|
|
if isRecordingMemo {
|
|
|
isRecordingMemo = false
|
|
@@ -164,7 +166,7 @@ struct ContentView: View {
|
|
|
debugText += "\nMemo ended"
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func startNewMemo() {
|
|
|
isRecordingMemo = true
|
|
|
currentMemo = ""
|
|
@@ -175,7 +177,7 @@ struct ContentView: View {
|
|
|
transcribeInRealTime()
|
|
|
print("Started new memo")
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func transcribeInRealTime() {
|
|
|
Task {
|
|
|
while isRecordingMemo {
|
|
@@ -198,33 +200,73 @@ struct ContentView: View {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func saveMemoToFile(_ memo: String) {
|
|
|
let dateFormatter = DateFormatter()
|
|
|
dateFormatter.dateFormat = "yyyy-MM-dd_HH-mm-ss"
|
|
|
let fileName = "memo_\(dateFormatter.string(from: Date())).txt"
|
|
|
-
|
|
|
+
|
|
|
guard let documentsDirectory = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first else {
|
|
|
print("Unable to access documents directory")
|
|
|
return
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
let fileURL = documentsDirectory.appendingPathComponent(fileName)
|
|
|
-
|
|
|
+
|
|
|
do {
|
|
|
try memo.write(to: fileURL, atomically: true, encoding: .utf8)
|
|
|
print("Memo saved to: \(fileURL.path)")
|
|
|
} catch {
|
|
|
print("Error saving memo: \(error)")
|
|
|
}
|
|
|
+
|
|
|
+ // After saving to file, send HTTP request
|
|
|
+ sendMemoToAPI(memo)
|
|
|
+ }
|
|
|
+
|
|
|
+ private func sendMemoToAPI(_ memo: String) {
|
|
|
+ guard let url = URL(string: apiEndpoint) else {
|
|
|
+ print("Invalid API endpoint URL")
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ let payload: [String: Any] = [
|
|
|
+ "model": "llama-3.1-8b",
|
|
|
+ "messages": [["role": "user", "content": memo]],
|
|
|
+ "temperature": 0.7
|
|
|
+ ]
|
|
|
+
|
|
|
+ guard let jsonData = try? JSONSerialization.data(withJSONObject: payload) else {
|
|
|
+ print("Failed to serialize JSON payload")
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ var request = URLRequest(url: url)
|
|
|
+ request.httpMethod = "POST"
|
|
|
+ request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
|
|
+ request.httpBody = jsonData
|
|
|
+
|
|
|
+ URLSession.shared.dataTask(with: request) { data, response, error in
|
|
|
+ if let error = error {
|
|
|
+ print("Error sending memo to API: \(error)")
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if let httpResponse = response as? HTTPURLResponse {
|
|
|
+ print("API response status code: \(httpResponse.statusCode)")
|
|
|
+ }
|
|
|
+
|
|
|
+ if let data = data, let responseString = String(data: data, encoding: .utf8) {
|
|
|
+ print("API response: \(responseString)")
|
|
|
+ }
|
|
|
+ }.resume()
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
private func loadModel(_ model: String) async throws -> Bool {
|
|
|
guard let whisperKit = whisperKit else {
|
|
|
print("WhisperKit instance not initialized")
|
|
|
return false
|
|
|
}
|
|
|
-
|
|
|
modelState = .loading
|
|
|
do {
|
|
|
print("Starting to load model: \(model)")
|
|
@@ -240,4 +282,4 @@ struct ContentView: View {
|
|
|
return false
|
|
|
}
|
|
|
}
|
|
|
-}
|
|
|
+}
|