351 lines
12 KiB
Kotlin
351 lines
12 KiB
Kotlin
package com.inou.clawdnode.calls
|
|
|
|
import android.content.ComponentName
|
|
import android.content.Context
|
|
import android.content.Intent
|
|
import android.content.ServiceConnection
|
|
import android.media.AudioManager
|
|
import android.os.Bundle
|
|
import android.os.IBinder
|
|
import android.speech.RecognitionListener
|
|
import android.speech.RecognizerIntent
|
|
import android.speech.SpeechRecognizer
|
|
import android.speech.tts.TextToSpeech
|
|
import android.speech.tts.UtteranceProgressListener
|
|
import android.telecom.Call
|
|
import com.inou.clawdnode.gateway.DirectGateway
|
|
import android.telecom.InCallService
|
|
import android.telecom.VideoProfile
|
|
import android.util.Log
|
|
import com.inou.clawdnode.ClawdNodeApp
|
|
import com.inou.clawdnode.protocol.CallAudioEvent
|
|
import com.inou.clawdnode.protocol.CallEndedEvent
|
|
import com.inou.clawdnode.service.CallManager
|
|
import com.inou.clawdnode.service.NodeService
|
|
import java.util.*
|
|
|
|
/**
|
|
* Handles voice interaction with active calls.
|
|
* - Answers calls programmatically
|
|
* - Speaks via TTS into the call
|
|
* - Listens and transcribes caller speech via STT
|
|
* - Routes audio events to Gateway for Claude to respond
|
|
*/
|
|
class VoiceCallService : InCallService(), TextToSpeech.OnInitListener {
|
|
|
|
private val tag = "VoiceCallService"
|
|
|
|
private var tts: TextToSpeech? = null
|
|
private var speechRecognizer: SpeechRecognizer? = null
|
|
private var audioManager: AudioManager? = null
|
|
|
|
private val activeCalls = mutableMapOf<String, Call>()
|
|
private val callStartTimes = mutableMapOf<String, Long>()
|
|
private var currentCallId: String? = null
|
|
private var isListening = false
|
|
|
|
private var nodeService: NodeService? = null
|
|
private val serviceConnection = object : ServiceConnection {
|
|
override fun onServiceConnected(name: ComponentName?, service: IBinder?) {
|
|
nodeService = (service as NodeService.LocalBinder).getService()
|
|
}
|
|
override fun onServiceDisconnected(name: ComponentName?) {
|
|
nodeService = null
|
|
}
|
|
}
|
|
|
|
override fun onCreate() {
|
|
super.onCreate()
|
|
Log.i(tag, "VoiceCallService created")
|
|
|
|
CallManager.register(this)
|
|
|
|
// Initialize TTS
|
|
tts = TextToSpeech(this, this)
|
|
|
|
// Initialize STT
|
|
if (SpeechRecognizer.isRecognitionAvailable(this)) {
|
|
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this)
|
|
speechRecognizer?.setRecognitionListener(createRecognitionListener())
|
|
}
|
|
|
|
audioManager = getSystemService(Context.AUDIO_SERVICE) as AudioManager
|
|
|
|
// Bind to NodeService
|
|
Intent(this, NodeService::class.java).also { intent ->
|
|
bindService(intent, serviceConnection, Context.BIND_AUTO_CREATE)
|
|
}
|
|
}
|
|
|
|
override fun onDestroy() {
|
|
Log.i(tag, "VoiceCallService destroyed")
|
|
tts?.shutdown()
|
|
speechRecognizer?.destroy()
|
|
unbindService(serviceConnection)
|
|
super.onDestroy()
|
|
}
|
|
|
|
// ========================================
|
|
// TTS INITIALIZATION
|
|
// ========================================
|
|
|
|
override fun onInit(status: Int) {
|
|
if (status == TextToSpeech.SUCCESS) {
|
|
tts?.language = Locale.US
|
|
Log.i(tag, "TTS initialized")
|
|
} else {
|
|
Log.e(tag, "TTS initialization failed")
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// CALL LIFECYCLE
|
|
// ========================================
|
|
|
|
override fun onCallAdded(call: Call) {
|
|
Log.i(tag, "Call added: ${call.details.handle}")
|
|
|
|
val callId = call.details.handle?.toString() ?: System.currentTimeMillis().toString()
|
|
activeCalls[callId] = call
|
|
|
|
// Register callback for call state changes
|
|
call.registerCallback(object : Call.Callback() {
|
|
override fun onStateChanged(call: Call, state: Int) {
|
|
handleCallStateChange(callId, call, state)
|
|
}
|
|
})
|
|
}
|
|
|
|
override fun onCallRemoved(call: Call) {
|
|
Log.i(tag, "Call removed")
|
|
|
|
val callId = activeCalls.entries.find { it.value == call }?.key
|
|
if (callId != null) {
|
|
activeCalls.remove(callId)
|
|
ActiveCalls.remove(callId)
|
|
|
|
if (currentCallId == callId) {
|
|
stopListening()
|
|
currentCallId = null
|
|
}
|
|
}
|
|
}
|
|
|
|
private fun handleCallStateChange(callId: String, call: Call, state: Int) {
|
|
Log.d(tag, "Call $callId state: $state")
|
|
|
|
when (state) {
|
|
Call.STATE_ACTIVE -> {
|
|
// Call is active, record start time and start listening
|
|
callStartTimes[callId] = System.currentTimeMillis()
|
|
currentCallId = callId
|
|
startListening()
|
|
}
|
|
Call.STATE_DISCONNECTED -> {
|
|
// Call ended - calculate duration
|
|
val startTime = callStartTimes.remove(callId)
|
|
val duration = if (startTime != null) {
|
|
((System.currentTimeMillis() - startTime) / 1000).toInt()
|
|
} else {
|
|
0
|
|
}
|
|
|
|
DirectGateway.sendLog("call.ended", mapOf(
|
|
"callId" to callId,
|
|
"number" to (call.details.handle?.schemeSpecificPart ?: "unknown"),
|
|
"duration" to duration,
|
|
"outcome" to "completed"
|
|
))
|
|
|
|
ClawdNodeApp.instance.auditLog.logCall(
|
|
"CALL_ENDED",
|
|
call.details.handle?.schemeSpecificPart,
|
|
null,
|
|
"completed (${duration}s)"
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// CALL CONTROL (called by NodeService via CallManager)
|
|
// ========================================
|
|
|
|
fun answerCall(callId: String, greeting: String?) {
|
|
val call = activeCalls[callId] ?: run {
|
|
Log.w(tag, "Call not found: $callId")
|
|
return
|
|
}
|
|
|
|
Log.i(tag, "Answering call: $callId")
|
|
call.answer(VideoProfile.STATE_AUDIO_ONLY)
|
|
|
|
ClawdNodeApp.instance.auditLog.logCall(
|
|
"CALL_ANSWERED",
|
|
call.details.handle?.schemeSpecificPart,
|
|
null,
|
|
"ai_answered"
|
|
)
|
|
|
|
// Speak greeting after answer
|
|
if (greeting != null) {
|
|
// Small delay to let call connect
|
|
android.os.Handler(mainLooper).postDelayed({
|
|
speakIntoCall(callId, greeting)
|
|
}, 500)
|
|
}
|
|
}
|
|
|
|
fun rejectCall(callId: String) {
|
|
val call = activeCalls[callId] ?: return
|
|
Log.i(tag, "Rejecting call: $callId")
|
|
call.reject(false, null)
|
|
|
|
ClawdNodeApp.instance.auditLog.logCall(
|
|
"CALL_REJECTED",
|
|
call.details.handle?.schemeSpecificPart,
|
|
null,
|
|
"ai_rejected"
|
|
)
|
|
}
|
|
|
|
fun silenceCall(callId: String) {
|
|
// Silence the ringer but let it continue
|
|
audioManager?.ringerMode = AudioManager.RINGER_MODE_SILENT
|
|
Log.i(tag, "Silenced call: $callId")
|
|
}
|
|
|
|
fun hangupCall(callId: String) {
|
|
val call = activeCalls[callId] ?: return
|
|
Log.i(tag, "Hanging up call: $callId")
|
|
call.disconnect()
|
|
}
|
|
|
|
fun speakIntoCall(callId: String, text: String) {
|
|
if (currentCallId != callId) {
|
|
Log.w(tag, "Not the active call: $callId")
|
|
return
|
|
}
|
|
|
|
Log.i(tag, "Speaking: $text")
|
|
|
|
// Pause listening while speaking
|
|
stopListening()
|
|
|
|
val params = Bundle()
|
|
params.putInt(TextToSpeech.Engine.KEY_PARAM_STREAM, AudioManager.STREAM_VOICE_CALL)
|
|
|
|
tts?.speak(text, TextToSpeech.QUEUE_FLUSH, params, "speak_$callId")
|
|
|
|
tts?.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
|
|
override fun onStart(utteranceId: String?) {}
|
|
override fun onDone(utteranceId: String?) {
|
|
// Resume listening after speaking
|
|
startListening()
|
|
}
|
|
override fun onError(utteranceId: String?) {
|
|
startListening()
|
|
}
|
|
})
|
|
|
|
ClawdNodeApp.instance.auditLog.log(
|
|
"CALL_SPEAK",
|
|
"TTS: $text",
|
|
mapOf("call_id" to callId)
|
|
)
|
|
}
|
|
|
|
// ========================================
|
|
// SPEECH RECOGNITION
|
|
// ========================================
|
|
|
|
private fun startListening() {
|
|
if (isListening || speechRecognizer == null) return
|
|
|
|
Log.d(tag, "Starting speech recognition")
|
|
isListening = true
|
|
|
|
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
|
|
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
|
|
putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.US)
|
|
putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
|
|
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
|
|
}
|
|
|
|
speechRecognizer?.startListening(intent)
|
|
}
|
|
|
|
private fun stopListening() {
|
|
if (!isListening) return
|
|
|
|
Log.d(tag, "Stopping speech recognition")
|
|
isListening = false
|
|
speechRecognizer?.stopListening()
|
|
}
|
|
|
|
private fun createRecognitionListener() = object : RecognitionListener {
|
|
override fun onReadyForSpeech(params: Bundle?) {
|
|
Log.d(tag, "STT ready")
|
|
}
|
|
|
|
override fun onBeginningOfSpeech() {
|
|
Log.d(tag, "STT speech started")
|
|
}
|
|
|
|
override fun onRmsChanged(rmsdB: Float) {}
|
|
|
|
override fun onBufferReceived(buffer: ByteArray?) {}
|
|
|
|
override fun onEndOfSpeech() {
|
|
Log.d(tag, "STT speech ended")
|
|
}
|
|
|
|
override fun onError(error: Int) {
|
|
Log.e(tag, "STT error: $error")
|
|
// Restart listening on most errors
|
|
if (isListening && currentCallId != null) {
|
|
android.os.Handler(mainLooper).postDelayed({
|
|
if (isListening) startListening()
|
|
}, 500)
|
|
}
|
|
}
|
|
|
|
override fun onResults(results: Bundle?) {
|
|
val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
|
|
val transcript = matches?.firstOrNull() ?: return
|
|
|
|
Log.i(tag, "STT result: $transcript")
|
|
|
|
currentCallId?.let { callId ->
|
|
DirectGateway.sendLog("call.audio", mapOf(
|
|
"callId" to callId,
|
|
"transcript" to transcript,
|
|
"isFinal" to true
|
|
))
|
|
}
|
|
|
|
// Continue listening
|
|
if (isListening) {
|
|
startListening()
|
|
}
|
|
}
|
|
|
|
override fun onPartialResults(partialResults: Bundle?) {
|
|
val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
|
|
val transcript = matches?.firstOrNull() ?: return
|
|
|
|
Log.d(tag, "STT partial: $transcript")
|
|
|
|
currentCallId?.let { callId ->
|
|
DirectGateway.sendLog("call.audio.partial", mapOf(
|
|
"callId" to callId,
|
|
"transcript" to transcript,
|
|
"isFinal" to false
|
|
))
|
|
}
|
|
}
|
|
|
|
override fun onEvent(eventType: Int, params: Bundle?) {}
|
|
}
|
|
}
|