clawdnode-android/app/src/main/java/com/inou/clawdnode/calls/VoiceCallService.kt

351 lines
12 KiB
Kotlin

package com.inou.clawdnode.calls
import android.content.ComponentName
import android.content.Context
import android.content.Intent
import android.content.ServiceConnection
import android.media.AudioManager
import android.os.Bundle
import android.os.IBinder
import android.speech.RecognitionListener
import android.speech.RecognizerIntent
import android.speech.SpeechRecognizer
import android.speech.tts.TextToSpeech
import android.speech.tts.UtteranceProgressListener
import android.telecom.Call
import com.inou.clawdnode.gateway.DirectGateway
import android.telecom.InCallService
import android.telecom.VideoProfile
import android.util.Log
import com.inou.clawdnode.ClawdNodeApp
import com.inou.clawdnode.protocol.CallAudioEvent
import com.inou.clawdnode.protocol.CallEndedEvent
import com.inou.clawdnode.service.CallManager
import com.inou.clawdnode.service.NodeService
import java.util.*
/**
* Handles voice interaction with active calls.
* - Answers calls programmatically
* - Speaks via TTS into the call
* - Listens and transcribes caller speech via STT
* - Routes audio events to Gateway for Claude to respond
*/
class VoiceCallService : InCallService(), TextToSpeech.OnInitListener {
private val tag = "VoiceCallService"
private var tts: TextToSpeech? = null
private var speechRecognizer: SpeechRecognizer? = null
private var audioManager: AudioManager? = null
private val activeCalls = mutableMapOf<String, Call>()
private val callStartTimes = mutableMapOf<String, Long>()
private var currentCallId: String? = null
private var isListening = false
private var nodeService: NodeService? = null
private val serviceConnection = object : ServiceConnection {
override fun onServiceConnected(name: ComponentName?, service: IBinder?) {
nodeService = (service as NodeService.LocalBinder).getService()
}
override fun onServiceDisconnected(name: ComponentName?) {
nodeService = null
}
}
override fun onCreate() {
super.onCreate()
Log.i(tag, "VoiceCallService created")
CallManager.register(this)
// Initialize TTS
tts = TextToSpeech(this, this)
// Initialize STT
if (SpeechRecognizer.isRecognitionAvailable(this)) {
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this)
speechRecognizer?.setRecognitionListener(createRecognitionListener())
}
audioManager = getSystemService(Context.AUDIO_SERVICE) as AudioManager
// Bind to NodeService
Intent(this, NodeService::class.java).also { intent ->
bindService(intent, serviceConnection, Context.BIND_AUTO_CREATE)
}
}
override fun onDestroy() {
Log.i(tag, "VoiceCallService destroyed")
tts?.shutdown()
speechRecognizer?.destroy()
unbindService(serviceConnection)
super.onDestroy()
}
// ========================================
// TTS INITIALIZATION
// ========================================
override fun onInit(status: Int) {
if (status == TextToSpeech.SUCCESS) {
tts?.language = Locale.US
Log.i(tag, "TTS initialized")
} else {
Log.e(tag, "TTS initialization failed")
}
}
// ========================================
// CALL LIFECYCLE
// ========================================
override fun onCallAdded(call: Call) {
Log.i(tag, "Call added: ${call.details.handle}")
val callId = call.details.handle?.toString() ?: System.currentTimeMillis().toString()
activeCalls[callId] = call
// Register callback for call state changes
call.registerCallback(object : Call.Callback() {
override fun onStateChanged(call: Call, state: Int) {
handleCallStateChange(callId, call, state)
}
})
}
override fun onCallRemoved(call: Call) {
Log.i(tag, "Call removed")
val callId = activeCalls.entries.find { it.value == call }?.key
if (callId != null) {
activeCalls.remove(callId)
ActiveCalls.remove(callId)
if (currentCallId == callId) {
stopListening()
currentCallId = null
}
}
}
private fun handleCallStateChange(callId: String, call: Call, state: Int) {
Log.d(tag, "Call $callId state: $state")
when (state) {
Call.STATE_ACTIVE -> {
// Call is active, record start time and start listening
callStartTimes[callId] = System.currentTimeMillis()
currentCallId = callId
startListening()
}
Call.STATE_DISCONNECTED -> {
// Call ended - calculate duration
val startTime = callStartTimes.remove(callId)
val duration = if (startTime != null) {
((System.currentTimeMillis() - startTime) / 1000).toInt()
} else {
0
}
DirectGateway.sendLog("call.ended", mapOf(
"callId" to callId,
"number" to (call.details.handle?.schemeSpecificPart ?: "unknown"),
"duration" to duration,
"outcome" to "completed"
))
ClawdNodeApp.instance.auditLog.logCall(
"CALL_ENDED",
call.details.handle?.schemeSpecificPart,
null,
"completed (${duration}s)"
)
}
}
}
// ========================================
// CALL CONTROL (called by NodeService via CallManager)
// ========================================
fun answerCall(callId: String, greeting: String?) {
val call = activeCalls[callId] ?: run {
Log.w(tag, "Call not found: $callId")
return
}
Log.i(tag, "Answering call: $callId")
call.answer(VideoProfile.STATE_AUDIO_ONLY)
ClawdNodeApp.instance.auditLog.logCall(
"CALL_ANSWERED",
call.details.handle?.schemeSpecificPart,
null,
"ai_answered"
)
// Speak greeting after answer
if (greeting != null) {
// Small delay to let call connect
android.os.Handler(mainLooper).postDelayed({
speakIntoCall(callId, greeting)
}, 500)
}
}
fun rejectCall(callId: String) {
val call = activeCalls[callId] ?: return
Log.i(tag, "Rejecting call: $callId")
call.reject(false, null)
ClawdNodeApp.instance.auditLog.logCall(
"CALL_REJECTED",
call.details.handle?.schemeSpecificPart,
null,
"ai_rejected"
)
}
fun silenceCall(callId: String) {
// Silence the ringer but let it continue
audioManager?.ringerMode = AudioManager.RINGER_MODE_SILENT
Log.i(tag, "Silenced call: $callId")
}
fun hangupCall(callId: String) {
val call = activeCalls[callId] ?: return
Log.i(tag, "Hanging up call: $callId")
call.disconnect()
}
fun speakIntoCall(callId: String, text: String) {
if (currentCallId != callId) {
Log.w(tag, "Not the active call: $callId")
return
}
Log.i(tag, "Speaking: $text")
// Pause listening while speaking
stopListening()
val params = Bundle()
params.putInt(TextToSpeech.Engine.KEY_PARAM_STREAM, AudioManager.STREAM_VOICE_CALL)
tts?.speak(text, TextToSpeech.QUEUE_FLUSH, params, "speak_$callId")
tts?.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(utteranceId: String?) {}
override fun onDone(utteranceId: String?) {
// Resume listening after speaking
startListening()
}
override fun onError(utteranceId: String?) {
startListening()
}
})
ClawdNodeApp.instance.auditLog.log(
"CALL_SPEAK",
"TTS: $text",
mapOf("call_id" to callId)
)
}
// ========================================
// SPEECH RECOGNITION
// ========================================
private fun startListening() {
if (isListening || speechRecognizer == null) return
Log.d(tag, "Starting speech recognition")
isListening = true
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.US)
putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
}
speechRecognizer?.startListening(intent)
}
private fun stopListening() {
if (!isListening) return
Log.d(tag, "Stopping speech recognition")
isListening = false
speechRecognizer?.stopListening()
}
private fun createRecognitionListener() = object : RecognitionListener {
override fun onReadyForSpeech(params: Bundle?) {
Log.d(tag, "STT ready")
}
override fun onBeginningOfSpeech() {
Log.d(tag, "STT speech started")
}
override fun onRmsChanged(rmsdB: Float) {}
override fun onBufferReceived(buffer: ByteArray?) {}
override fun onEndOfSpeech() {
Log.d(tag, "STT speech ended")
}
override fun onError(error: Int) {
Log.e(tag, "STT error: $error")
// Restart listening on most errors
if (isListening && currentCallId != null) {
android.os.Handler(mainLooper).postDelayed({
if (isListening) startListening()
}, 500)
}
}
override fun onResults(results: Bundle?) {
val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
val transcript = matches?.firstOrNull() ?: return
Log.i(tag, "STT result: $transcript")
currentCallId?.let { callId ->
DirectGateway.sendLog("call.audio", mapOf(
"callId" to callId,
"transcript" to transcript,
"isFinal" to true
))
}
// Continue listening
if (isListening) {
startListening()
}
}
override fun onPartialResults(partialResults: Bundle?) {
val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
val transcript = matches?.firstOrNull() ?: return
Log.d(tag, "STT partial: $transcript")
currentCallId?.let { callId ->
DirectGateway.sendLog("call.audio.partial", mapOf(
"callId" to callId,
"transcript" to transcript,
"isFinal" to false
))
}
}
override fun onEvent(eventType: Int, params: Bundle?) {}
}
}