diff --git a/docs/SECURITY-HARDENING.md b/docs/SECURITY-HARDENING.md index f908bef..99e60c1 100644 --- a/docs/SECURITY-HARDENING.md +++ b/docs/SECURITY-HARDENING.md @@ -275,3 +275,83 @@ Internet - Mission Control listens on localhost or a private network - OpenClaw Gateway is bound to loopback only - Agent workspaces are isolated per-agent directories + +--- + +## Agent Auth: Least-Privilege Key Guidance + +### The Problem + +The global API key (`API_KEY` env var) grants full `admin` access. When agents use it, they can: +- Create/delete other agents +- Modify any task or project +- Rotate the API key itself +- Access all workspaces + +This violates least-privilege. A compromised agent session leaks admin access. + +### Recommended: Agent-Scoped Keys + +Create per-agent keys with limited scopes: + +```bash +# Create a scoped key for agent "Aegis" (via CLI) +pnpm mc raw --method POST --path /api/agents/5/keys --body '{ + "name": "aegis-worker", + "scopes": ["viewer", "agent:self", "agent:diagnostics", "tasks:write"], + "expires_in_days": 30 +}' --json +``` + +Scoped keys: +- Can only act as the agent they belong to (no cross-agent access) +- Have explicit scope lists (viewer, agent:self, tasks:write, etc.) +- Auto-expire after a set period +- Can be revoked without affecting other agents +- Are logged separately in the audit trail + +### Auth Hierarchy + +| Method | Role | Use Case | +|--------|------|----------| +| Agent-scoped key (`mca_...`) | Per-scope | Autonomous agents (recommended) | +| Global API key | admin | Admin scripts, CI/CD, initial setup | +| Session cookie | Per-user role | Human operators via web UI | +| Proxy header | Per-user role | SSO/gateway-authenticated users | + +### Monitoring Global Key Usage + +Mission Control logs a security event (`global_api_key_used`) every time the global API key is used. Monitor these in the audit log: + +```bash +pnpm mc raw --method GET --path '/api/security-audit?event_type=global_api_key_used&timeframe=day' --json +``` + +Goal: drive global key usage to zero in production by replacing with scoped agent keys. + +### Rate Limiting by Agent Identity + +Agent-facing endpoints use per-agent rate limiters (keyed by `x-agent-name` header): +- Heartbeat: 30/min per agent +- Task polling: 20/min per agent +- Self-registration: 5/min per IP + +This prevents a runaway agent from consuming the entire rate limit budget. + +--- + +## Rate Limit Backend Strategy + +Current: in-memory `Map` per process (suitable for single-instance deployments). + +For multi-instance deployments, the rate limiter supports a pluggable backend via the `createRateLimiter` factory. Future options: +- **Redis**: shared state across instances (use Upstash or self-hosted) +- **SQLite WAL**: leverage the existing DB for cross-process coordination +- **Edge KV**: for edge-deployed instances + +The current implementation includes: +- Periodic cleanup (60s interval) +- Capacity-bounded maps (default 10K entries, LRU eviction) +- Trusted proxy IP parsing (`MC_TRUSTED_PROXIES`) + +No action needed for single-instance deployments. For multi-instance, implement a custom `RateLimitStore` interface when scaling beyond 1 node. diff --git a/src/lib/auth.ts b/src/lib/auth.ts index e49aa75..3fb441d 100644 --- a/src/lib/auth.ts +++ b/src/lib/auth.ts @@ -423,6 +423,20 @@ export function getUserFromRequest(request: Request): User | null { const configuredApiKey = resolveActiveApiKey() if (configuredApiKey && apiKey && safeCompare(apiKey, configuredApiKey)) { + // FR-D2: Log warning when global admin API key is used. + // Prefer agent-scoped keys (POST /api/agents/{id}/keys) for least-privilege access. + try { + logSecurityEvent({ + event_type: 'global_api_key_used', + severity: 'info', + source: 'auth', + agent_name: agentName || undefined, + detail: JSON.stringify({ hint: 'Consider using agent-scoped API keys for least-privilege access' }), + ip_address: request.headers.get('x-real-ip') || 'unknown', + workspace_id: getDefaultWorkspaceContext().workspaceId, + tenant_id: getDefaultWorkspaceContext().tenantId, + }) + } catch { /* startup race */ } return { id: 0, username: 'api', diff --git a/src/lib/migrations.ts b/src/lib/migrations.ts index b3a893e..07b97d8 100644 --- a/src/lib/migrations.ts +++ b/src/lib/migrations.ts @@ -1268,6 +1268,32 @@ const migrations: Migration[] = [ up(db: Database.Database) { db.exec(`ALTER TABLE agents ADD COLUMN hidden INTEGER NOT NULL DEFAULT 0`) } + }, + { + id: '043_spawn_history', + up(db: Database.Database) { + db.exec([ + `CREATE TABLE IF NOT EXISTS spawn_history (`, + ` id INTEGER PRIMARY KEY AUTOINCREMENT,`, + ` agent_id INTEGER,`, + ` agent_name TEXT NOT NULL,`, + ` spawn_type TEXT NOT NULL DEFAULT 'claude-code',`, + ` session_id TEXT,`, + ` trigger TEXT,`, + ` status TEXT NOT NULL DEFAULT 'started',`, + ` exit_code INTEGER,`, + ` error TEXT,`, + ` duration_ms INTEGER,`, + ` workspace_id INTEGER NOT NULL DEFAULT 1,`, + ` created_at INTEGER NOT NULL DEFAULT (unixepoch()),`, + ` finished_at INTEGER,`, + ` FOREIGN KEY (agent_id) REFERENCES agents(id) ON DELETE SET NULL`, + `)`, + ].join('\n')) + db.exec(`CREATE INDEX IF NOT EXISTS idx_spawn_history_agent ON spawn_history(agent_name)`) + db.exec(`CREATE INDEX IF NOT EXISTS idx_spawn_history_created ON spawn_history(created_at)`) + db.exec(`CREATE INDEX IF NOT EXISTS idx_spawn_history_status ON spawn_history(status)`) + } } ] diff --git a/src/lib/spawn-history.ts b/src/lib/spawn-history.ts new file mode 100644 index 0000000..ab719f7 --- /dev/null +++ b/src/lib/spawn-history.ts @@ -0,0 +1,135 @@ +/** + * Spawn History — durable persistence for agent spawn events. + * + * Replaces log-scraping fallback with DB-backed spawn tracking. + * Every agent session spawn (claude-code, codex-cli, hermes) is recorded + * with status, duration, and error details for diagnostics and attribution. + */ + +import { getDatabase } from '@/lib/db' + +export interface SpawnRecord { + id: number + agent_id: number | null + agent_name: string + spawn_type: string + session_id: string | null + trigger: string | null + status: string + exit_code: number | null + error: string | null + duration_ms: number | null + workspace_id: number + created_at: number + finished_at: number | null +} + +export function recordSpawnStart(input: { + agentName: string + agentId?: number + spawnType?: string + sessionId?: string + trigger?: string + workspaceId?: number +}): number { + const db = getDatabase() + const result = db.prepare(` + INSERT INTO spawn_history (agent_name, agent_id, spawn_type, session_id, trigger, status, workspace_id) + VALUES (?, ?, ?, ?, ?, 'started', ?) + `).run( + input.agentName, + input.agentId ?? null, + input.spawnType ?? 'claude-code', + input.sessionId ?? null, + input.trigger ?? null, + input.workspaceId ?? 1, + ) + return result.lastInsertRowid as number +} + +export function recordSpawnFinish(id: number, input: { + status: 'completed' | 'failed' | 'terminated' + exitCode?: number + error?: string + durationMs?: number +}): void { + const db = getDatabase() + db.prepare(` + UPDATE spawn_history + SET status = ?, exit_code = ?, error = ?, duration_ms = ?, finished_at = unixepoch() + WHERE id = ? + `).run( + input.status, + input.exitCode ?? null, + input.error ?? null, + input.durationMs ?? null, + id, + ) +} + +export function getSpawnHistory(agentName: string, opts?: { + hours?: number + limit?: number + workspaceId?: number +}): SpawnRecord[] { + const db = getDatabase() + const hours = opts?.hours ?? 24 + const limit = opts?.limit ?? 50 + const since = Math.floor(Date.now() / 1000) - hours * 3600 + + return db.prepare(` + SELECT * FROM spawn_history + WHERE agent_name = ? AND workspace_id = ? AND created_at > ? + ORDER BY created_at DESC + LIMIT ? + `).all(agentName, opts?.workspaceId ?? 1, since, limit) as SpawnRecord[] +} + +export function getSpawnStats(opts?: { + hours?: number + workspaceId?: number +}): { + total: number + completed: number + failed: number + avgDurationMs: number + byAgent: Array<{ agent_name: string; count: number; failures: number }> +} { + const db = getDatabase() + const hours = opts?.hours ?? 24 + const since = Math.floor(Date.now() / 1000) - hours * 3600 + const wsId = opts?.workspaceId ?? 1 + + const totals = db.prepare(` + SELECT + COUNT(*) as total, + SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed, + SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed, + AVG(duration_ms) as avg_duration + FROM spawn_history + WHERE workspace_id = ? AND created_at > ? + `).get(wsId, since) as any + + const byAgent = db.prepare(` + SELECT + agent_name, + COUNT(*) as count, + SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failures + FROM spawn_history + WHERE workspace_id = ? AND created_at > ? + GROUP BY agent_name + ORDER BY count DESC + `).all(wsId, since) as any[] + + return { + total: totals?.total ?? 0, + completed: totals?.completed ?? 0, + failed: totals?.failed ?? 0, + avgDurationMs: Math.round(totals?.avg_duration ?? 0), + byAgent: byAgent.map((row: any) => ({ + agent_name: row.agent_name, + count: row.count, + failures: row.failures, + })), + } +} diff --git a/tests/mcp-server.spec.ts b/tests/mcp-server.spec.ts index 7237cd2..0fed81d 100644 --- a/tests/mcp-server.spec.ts +++ b/tests/mcp-server.spec.ts @@ -58,7 +58,7 @@ async function mcpRequest(method: string, params: object = {}, id = 1): Promise< } /** Call an MCP tool and return the parsed content */ -async function mcpTool(name: string, args: object = {}): Promise<{ content: string; isError?: boolean }> { +async function mcpTool(name: string, args: object = {}): Promise<{ content: any; isError?: boolean }> { const response = await mcpRequest('tools/call', { name, arguments: args }, 99) const text = response?.result?.content?.[0]?.text || '' let parsed: any