mission-control/scripts/agent-heartbeat.sh

241 lines
7.0 KiB
Bash

#!/bin/bash
# Mission Control Phase 3: Agent Heartbeat Script
# Called by OpenClaw cron every 15 minutes to wake agents and check for work
#
# Usage:
# scripts/agent-heartbeat.sh [agent_name]
#
# If no agent specified, checks all agents with session keys
set -e
# Configuration
MISSION_CONTROL_URL="${MISSION_CONTROL_URL:-http://localhost:3000}"
LOG_DIR="${LOG_DIR:-$HOME/.mission-control/logs}"
LOG_FILE="$LOG_DIR/agent-heartbeat-$(date +%Y-%m-%d).log"
MAX_CONCURRENT=3 # Max agents to check concurrently
OPENCLAW_CMD="${OPENCLAW_CMD:-openclaw}"
# Ensure log directory exists
mkdir -p "$LOG_DIR"
# Logging function
log() {
local level="$1"
shift
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $*" | tee -a "$LOG_FILE"
}
# Check if Mission Control is running
check_mission_control() {
if ! curl -s "$MISSION_CONTROL_URL/api/status" > /dev/null 2>&1; then
log "ERROR" "Mission Control not accessible at $MISSION_CONTROL_URL"
return 1
fi
return 0
}
# Check heartbeat for specific agent
check_agent_heartbeat() {
local agent_name="$1"
local agent_id="$2"
log "INFO" "Checking heartbeat for agent: $agent_name"
# Call heartbeat endpoint
local response
response=$(curl -s -w "HTTP_STATUS:%{http_code}" "$MISSION_CONTROL_URL/api/agents/$agent_id/heartbeat" 2>/dev/null)
local http_code
http_code=$(echo "$response" | grep -o "HTTP_STATUS:[0-9]*" | cut -d: -f2)
local body
body=$(echo "$response" | sed 's/HTTP_STATUS:[0-9]*$//')
if [[ "$http_code" != "200" ]]; then
log "ERROR" "Heartbeat failed for $agent_name: HTTP $http_code"
return 1
fi
# Parse response
local status
status=$(echo "$body" | jq -r '.status // "unknown"' 2>/dev/null || echo "parse_error")
if [[ "$status" == "HEARTBEAT_OK" ]]; then
log "INFO" "Agent $agent_name: No work items found"
return 0
elif [[ "$status" == "WORK_ITEMS_FOUND" ]]; then
local total_items
total_items=$(echo "$body" | jq -r '.total_items // 0' 2>/dev/null || echo "0")
log "INFO" "Agent $agent_name: Found $total_items work items"
# If work items found and agent has session key, send wake notification
local session_key
session_key=$(get_agent_session_key "$agent_name")
if [[ -n "$session_key" && "$session_key" != "null" ]]; then
send_wake_notification "$agent_name" "$session_key" "$total_items" "$body"
else
log "WARN" "Agent $agent_name has work items but no session key configured"
fi
return 0
else
log "ERROR" "Unexpected heartbeat response for $agent_name: $status"
return 1
fi
}
# Get agent session key from database
get_agent_session_key() {
local agent_name="$1"
# Query agents API to get session key
local agent_data
agent_data=$(curl -s "$MISSION_CONTROL_URL/api/agents?limit=100" 2>/dev/null | jq -r ".agents[] | select(.name == \"$agent_name\") | .session_key" 2>/dev/null || echo "")
echo "$agent_data"
}
# Send wake notification to agent session
send_wake_notification() {
local agent_name="$1"
local session_key="$2"
local work_items_count="$3"
local heartbeat_data="$4"
log "INFO" "Sending wake notification to $agent_name (session: $session_key)"
# Format wake message
local wake_message="🤖 **Mission Control Heartbeat**\n\n"
wake_message+="Agent: $agent_name\n"
wake_message+="Work items found: $work_items_count\n\n"
wake_message+="🔔 You have notifications or tasks that need attention.\n"
wake_message+="Use Mission Control to view details: $MISSION_CONTROL_URL\n\n"
wake_message+="$(date '+%Y-%m-%d %H:%M:%S')"
# Send via OpenClaw sessions_send
if "$OPENCLAW_CMD" gateway sessions_send --session "$session_key" --message "$wake_message" >> "$LOG_FILE" 2>&1; then
log "INFO" "Wake notification sent successfully to $agent_name"
else
log "ERROR" "Failed to send wake notification to $agent_name"
fi
}
# Get list of agents to check
get_agents_to_check() {
local filter_agent="$1"
if [[ -n "$filter_agent" ]]; then
# Check specific agent
echo "$filter_agent"
return
fi
# Get all agents with session keys
curl -s "$MISSION_CONTROL_URL/api/agents?limit=100" 2>/dev/null | \
jq -r '.agents[] | select(.session_key != null and .session_key != "") | .name' 2>/dev/null || \
echo ""
}
# Main execution
main() {
local target_agent="$1"
log "INFO" "Starting agent heartbeat check (PID: $$)"
# Check if Mission Control is running
if ! check_mission_control; then
log "ERROR" "Aborting: Mission Control not accessible"
exit 1
fi
# Get agents to check
local agents
agents=$(get_agents_to_check "$target_agent")
if [[ -z "$agents" ]]; then
log "WARN" "No agents found with session keys configured"
exit 0
fi
local total_agents
total_agents=$(echo "$agents" | wc -l)
log "INFO" "Checking heartbeat for $total_agents agent(s)"
# Process agents (limit concurrency)
local processed=0
local successful=0
local failed=0
local pids=()
while IFS= read -r agent_name; do
[[ -z "$agent_name" ]] && continue
# Wait if we've reached max concurrent processes
while [[ ${#pids[@]} -ge $MAX_CONCURRENT ]]; do
for i in "${!pids[@]}"; do
if ! kill -0 "${pids[$i]}" 2>/dev/null; then
unset "pids[$i]"
fi
done
pids=("${pids[@]}") # Reindex array
if [[ ${#pids[@]} -ge $MAX_CONCURRENT ]]; then
sleep 1
fi
done
# Start heartbeat check in background
(
if check_agent_heartbeat "$agent_name" "$agent_name"; then
echo "SUCCESS:$agent_name"
else
echo "FAILED:$agent_name"
fi
) &
pids+=($!)
((processed++))
done <<< "$agents"
# Wait for all background processes
for pid in "${pids[@]}"; do
if wait "$pid"; then
((successful++))
else
((failed++))
fi
done
log "INFO" "Heartbeat check completed: $processed processed, $successful successful, $failed failed"
# Return appropriate exit code
if [[ $failed -gt 0 ]]; then
exit 1
else
exit 0
fi
}
# Handle script arguments
case "${1:-}" in
--help|-h)
echo "Mission Control Agent Heartbeat Script"
echo ""
echo "Usage: $0 [agent_name]"
echo ""
echo "Options:"
echo " agent_name Check specific agent only"
echo " --help, -h Show this help message"
echo ""
echo "Environment variables:"
echo " MISSION_CONTROL_URL Mission Control base URL (default: http://localhost:3005)"
echo ""
exit 0
;;
*)
main "$@"
;;
esac