package com.inou.clawdnode.calls import android.content.ComponentName import android.content.Context import android.content.Intent import android.content.ServiceConnection import android.media.AudioManager import android.os.Bundle import android.os.IBinder import android.speech.RecognitionListener import android.speech.RecognizerIntent import android.speech.SpeechRecognizer import android.speech.tts.TextToSpeech import android.speech.tts.UtteranceProgressListener import android.telecom.Call import com.inou.clawdnode.gateway.DirectGateway import android.telecom.InCallService import android.telecom.VideoProfile import android.util.Log import com.inou.clawdnode.ClawdNodeApp import com.inou.clawdnode.protocol.CallAudioEvent import com.inou.clawdnode.protocol.CallEndedEvent import com.inou.clawdnode.service.CallManager import com.inou.clawdnode.service.NodeService import java.util.* /** * Handles voice interaction with active calls. * - Answers calls programmatically * - Speaks via TTS into the call * - Listens and transcribes caller speech via STT * - Routes audio events to Gateway for Claude to respond */ class VoiceCallService : InCallService(), TextToSpeech.OnInitListener { private val tag = "VoiceCallService" private var tts: TextToSpeech? = null private var speechRecognizer: SpeechRecognizer? = null private var audioManager: AudioManager? = null private val activeCalls = mutableMapOf() private val callStartTimes = mutableMapOf() private var currentCallId: String? = null private var isListening = false private var nodeService: NodeService? = null private val serviceConnection = object : ServiceConnection { override fun onServiceConnected(name: ComponentName?, service: IBinder?) { nodeService = (service as NodeService.LocalBinder).getService() } override fun onServiceDisconnected(name: ComponentName?) { nodeService = null } } override fun onCreate() { super.onCreate() Log.i(tag, "VoiceCallService created") CallManager.register(this) // Initialize TTS tts = TextToSpeech(this, this) // Initialize STT if (SpeechRecognizer.isRecognitionAvailable(this)) { speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this) speechRecognizer?.setRecognitionListener(createRecognitionListener()) } audioManager = getSystemService(Context.AUDIO_SERVICE) as AudioManager // Bind to NodeService Intent(this, NodeService::class.java).also { intent -> bindService(intent, serviceConnection, Context.BIND_AUTO_CREATE) } } override fun onDestroy() { Log.i(tag, "VoiceCallService destroyed") tts?.shutdown() speechRecognizer?.destroy() unbindService(serviceConnection) super.onDestroy() } // ======================================== // TTS INITIALIZATION // ======================================== override fun onInit(status: Int) { if (status == TextToSpeech.SUCCESS) { tts?.language = Locale.US Log.i(tag, "TTS initialized") } else { Log.e(tag, "TTS initialization failed") } } // ======================================== // CALL LIFECYCLE // ======================================== override fun onCallAdded(call: Call) { Log.i(tag, "Call added: ${call.details.handle}") val callId = call.details.handle?.toString() ?: System.currentTimeMillis().toString() activeCalls[callId] = call // Register callback for call state changes call.registerCallback(object : Call.Callback() { override fun onStateChanged(call: Call, state: Int) { handleCallStateChange(callId, call, state) } }) } override fun onCallRemoved(call: Call) { Log.i(tag, "Call removed") val callId = activeCalls.entries.find { it.value == call }?.key if (callId != null) { activeCalls.remove(callId) ActiveCalls.remove(callId) if (currentCallId == callId) { stopListening() currentCallId = null } } } private fun handleCallStateChange(callId: String, call: Call, state: Int) { Log.d(tag, "Call $callId state: $state") when (state) { Call.STATE_ACTIVE -> { // Call is active, record start time and start listening callStartTimes[callId] = System.currentTimeMillis() currentCallId = callId startListening() } Call.STATE_DISCONNECTED -> { // Call ended - calculate duration val startTime = callStartTimes.remove(callId) val duration = if (startTime != null) { ((System.currentTimeMillis() - startTime) / 1000).toInt() } else { 0 } DirectGateway.sendLog("call.ended", mapOf( "callId" to callId, "number" to (call.details.handle?.schemeSpecificPart ?: "unknown"), "duration" to duration, "outcome" to "completed" )) ClawdNodeApp.instance.auditLog.logCall( "CALL_ENDED", call.details.handle?.schemeSpecificPart, null, "completed (${duration}s)" ) } } } // ======================================== // CALL CONTROL (called by NodeService via CallManager) // ======================================== fun answerCall(callId: String, greeting: String?) { val call = activeCalls[callId] ?: run { Log.w(tag, "Call not found: $callId") return } Log.i(tag, "Answering call: $callId") call.answer(VideoProfile.STATE_AUDIO_ONLY) ClawdNodeApp.instance.auditLog.logCall( "CALL_ANSWERED", call.details.handle?.schemeSpecificPart, null, "ai_answered" ) // Speak greeting after answer if (greeting != null) { // Small delay to let call connect android.os.Handler(mainLooper).postDelayed({ speakIntoCall(callId, greeting) }, 500) } } fun rejectCall(callId: String) { val call = activeCalls[callId] ?: return Log.i(tag, "Rejecting call: $callId") call.reject(false, null) ClawdNodeApp.instance.auditLog.logCall( "CALL_REJECTED", call.details.handle?.schemeSpecificPart, null, "ai_rejected" ) } fun silenceCall(callId: String) { // Silence the ringer but let it continue audioManager?.ringerMode = AudioManager.RINGER_MODE_SILENT Log.i(tag, "Silenced call: $callId") } fun hangupCall(callId: String) { val call = activeCalls[callId] ?: return Log.i(tag, "Hanging up call: $callId") call.disconnect() } fun speakIntoCall(callId: String, text: String) { if (currentCallId != callId) { Log.w(tag, "Not the active call: $callId") return } Log.i(tag, "Speaking: $text") // Pause listening while speaking stopListening() val params = Bundle() params.putInt(TextToSpeech.Engine.KEY_PARAM_STREAM, AudioManager.STREAM_VOICE_CALL) tts?.speak(text, TextToSpeech.QUEUE_FLUSH, params, "speak_$callId") tts?.setOnUtteranceProgressListener(object : UtteranceProgressListener() { override fun onStart(utteranceId: String?) {} override fun onDone(utteranceId: String?) { // Resume listening after speaking startListening() } override fun onError(utteranceId: String?) { startListening() } }) ClawdNodeApp.instance.auditLog.log( "CALL_SPEAK", "TTS: $text", mapOf("call_id" to callId) ) } // ======================================== // SPEECH RECOGNITION // ======================================== private fun startListening() { if (isListening || speechRecognizer == null) return Log.d(tag, "Starting speech recognition") isListening = true val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.US) putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true) putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1) } speechRecognizer?.startListening(intent) } private fun stopListening() { if (!isListening) return Log.d(tag, "Stopping speech recognition") isListening = false speechRecognizer?.stopListening() } private fun createRecognitionListener() = object : RecognitionListener { override fun onReadyForSpeech(params: Bundle?) { Log.d(tag, "STT ready") } override fun onBeginningOfSpeech() { Log.d(tag, "STT speech started") } override fun onRmsChanged(rmsdB: Float) {} override fun onBufferReceived(buffer: ByteArray?) {} override fun onEndOfSpeech() { Log.d(tag, "STT speech ended") } override fun onError(error: Int) { Log.e(tag, "STT error: $error") // Restart listening on most errors if (isListening && currentCallId != null) { android.os.Handler(mainLooper).postDelayed({ if (isListening) startListening() }, 500) } } override fun onResults(results: Bundle?) { val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION) val transcript = matches?.firstOrNull() ?: return Log.i(tag, "STT result: $transcript") currentCallId?.let { callId -> DirectGateway.sendLog("call.audio", mapOf( "callId" to callId, "transcript" to transcript, "isFinal" to true )) } // Continue listening if (isListening) { startListening() } } override fun onPartialResults(partialResults: Bundle?) { val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION) val transcript = matches?.firstOrNull() ?: return Log.d(tag, "STT partial: $transcript") currentCallId?.let { callId -> DirectGateway.sendLog("call.audio.partial", mapOf( "callId" to callId, "transcript" to transcript, "isFinal" to false )) } } override fun onEvent(eventType: Int, params: Bundle?) {} } }