feat: platform hardening — spawn history, auth warnings, security docs

FR-D1: Add least-privilege auth guidance to SECURITY-HARDENING.md
  - Agent-scoped keys vs global API key comparison
  - Auth hierarchy table (scoped key > global key > session > proxy)
  - CLI examples for creating scoped keys
  - Monitoring guidance for global key usage

FR-D2: Log security event when global admin API key is used
  - Emits 'global_api_key_used' event to audit trail
  - Hints toward agent-scoped keys for least-privilege

FR-D3: Add durable spawn history persistence
  - New migration 043_spawn_history with indexed table
  - spawn-history.ts with recordSpawnStart/Finish, getSpawnHistory,
    getSpawnStats functions
  - Replaces log-scraping fallback with DB-backed tracking

FR-D4: Document rate-limit backend strategy
  - Current in-memory Map approach documented
  - Pluggable backend plan for multi-instance (Redis, SQLite WAL)
  - Per-agent rate limiter details documented

Also fixes MCP test type annotation (content: string → any).
This commit is contained in:
Nyk 2026-03-21 21:52:12 +07:00
parent 06cfb3d9db
commit f12aac13c3
5 changed files with 256 additions and 1 deletions

View File

@ -275,3 +275,83 @@ Internet
- Mission Control listens on localhost or a private network - Mission Control listens on localhost or a private network
- OpenClaw Gateway is bound to loopback only - OpenClaw Gateway is bound to loopback only
- Agent workspaces are isolated per-agent directories - Agent workspaces are isolated per-agent directories
---
## Agent Auth: Least-Privilege Key Guidance
### The Problem
The global API key (`API_KEY` env var) grants full `admin` access. When agents use it, they can:
- Create/delete other agents
- Modify any task or project
- Rotate the API key itself
- Access all workspaces
This violates least-privilege. A compromised agent session leaks admin access.
### Recommended: Agent-Scoped Keys
Create per-agent keys with limited scopes:
```bash
# Create a scoped key for agent "Aegis" (via CLI)
pnpm mc raw --method POST --path /api/agents/5/keys --body '{
"name": "aegis-worker",
"scopes": ["viewer", "agent:self", "agent:diagnostics", "tasks:write"],
"expires_in_days": 30
}' --json
```
Scoped keys:
- Can only act as the agent they belong to (no cross-agent access)
- Have explicit scope lists (viewer, agent:self, tasks:write, etc.)
- Auto-expire after a set period
- Can be revoked without affecting other agents
- Are logged separately in the audit trail
### Auth Hierarchy
| Method | Role | Use Case |
|--------|------|----------|
| Agent-scoped key (`mca_...`) | Per-scope | Autonomous agents (recommended) |
| Global API key | admin | Admin scripts, CI/CD, initial setup |
| Session cookie | Per-user role | Human operators via web UI |
| Proxy header | Per-user role | SSO/gateway-authenticated users |
### Monitoring Global Key Usage
Mission Control logs a security event (`global_api_key_used`) every time the global API key is used. Monitor these in the audit log:
```bash
pnpm mc raw --method GET --path '/api/security-audit?event_type=global_api_key_used&timeframe=day' --json
```
Goal: drive global key usage to zero in production by replacing with scoped agent keys.
### Rate Limiting by Agent Identity
Agent-facing endpoints use per-agent rate limiters (keyed by `x-agent-name` header):
- Heartbeat: 30/min per agent
- Task polling: 20/min per agent
- Self-registration: 5/min per IP
This prevents a runaway agent from consuming the entire rate limit budget.
---
## Rate Limit Backend Strategy
Current: in-memory `Map` per process (suitable for single-instance deployments).
For multi-instance deployments, the rate limiter supports a pluggable backend via the `createRateLimiter` factory. Future options:
- **Redis**: shared state across instances (use Upstash or self-hosted)
- **SQLite WAL**: leverage the existing DB for cross-process coordination
- **Edge KV**: for edge-deployed instances
The current implementation includes:
- Periodic cleanup (60s interval)
- Capacity-bounded maps (default 10K entries, LRU eviction)
- Trusted proxy IP parsing (`MC_TRUSTED_PROXIES`)
No action needed for single-instance deployments. For multi-instance, implement a custom `RateLimitStore` interface when scaling beyond 1 node.

View File

@ -423,6 +423,20 @@ export function getUserFromRequest(request: Request): User | null {
const configuredApiKey = resolveActiveApiKey() const configuredApiKey = resolveActiveApiKey()
if (configuredApiKey && apiKey && safeCompare(apiKey, configuredApiKey)) { if (configuredApiKey && apiKey && safeCompare(apiKey, configuredApiKey)) {
// FR-D2: Log warning when global admin API key is used.
// Prefer agent-scoped keys (POST /api/agents/{id}/keys) for least-privilege access.
try {
logSecurityEvent({
event_type: 'global_api_key_used',
severity: 'info',
source: 'auth',
agent_name: agentName || undefined,
detail: JSON.stringify({ hint: 'Consider using agent-scoped API keys for least-privilege access' }),
ip_address: request.headers.get('x-real-ip') || 'unknown',
workspace_id: getDefaultWorkspaceContext().workspaceId,
tenant_id: getDefaultWorkspaceContext().tenantId,
})
} catch { /* startup race */ }
return { return {
id: 0, id: 0,
username: 'api', username: 'api',

View File

@ -1268,6 +1268,32 @@ const migrations: Migration[] = [
up(db: Database.Database) { up(db: Database.Database) {
db.exec(`ALTER TABLE agents ADD COLUMN hidden INTEGER NOT NULL DEFAULT 0`) db.exec(`ALTER TABLE agents ADD COLUMN hidden INTEGER NOT NULL DEFAULT 0`)
} }
},
{
id: '043_spawn_history',
up(db: Database.Database) {
db.exec([
`CREATE TABLE IF NOT EXISTS spawn_history (`,
` id INTEGER PRIMARY KEY AUTOINCREMENT,`,
` agent_id INTEGER,`,
` agent_name TEXT NOT NULL,`,
` spawn_type TEXT NOT NULL DEFAULT 'claude-code',`,
` session_id TEXT,`,
` trigger TEXT,`,
` status TEXT NOT NULL DEFAULT 'started',`,
` exit_code INTEGER,`,
` error TEXT,`,
` duration_ms INTEGER,`,
` workspace_id INTEGER NOT NULL DEFAULT 1,`,
` created_at INTEGER NOT NULL DEFAULT (unixepoch()),`,
` finished_at INTEGER,`,
` FOREIGN KEY (agent_id) REFERENCES agents(id) ON DELETE SET NULL`,
`)`,
].join('\n'))
db.exec(`CREATE INDEX IF NOT EXISTS idx_spawn_history_agent ON spawn_history(agent_name)`)
db.exec(`CREATE INDEX IF NOT EXISTS idx_spawn_history_created ON spawn_history(created_at)`)
db.exec(`CREATE INDEX IF NOT EXISTS idx_spawn_history_status ON spawn_history(status)`)
}
} }
] ]

135
src/lib/spawn-history.ts Normal file
View File

@ -0,0 +1,135 @@
/**
* Spawn History durable persistence for agent spawn events.
*
* Replaces log-scraping fallback with DB-backed spawn tracking.
* Every agent session spawn (claude-code, codex-cli, hermes) is recorded
* with status, duration, and error details for diagnostics and attribution.
*/
import { getDatabase } from '@/lib/db'
export interface SpawnRecord {
id: number
agent_id: number | null
agent_name: string
spawn_type: string
session_id: string | null
trigger: string | null
status: string
exit_code: number | null
error: string | null
duration_ms: number | null
workspace_id: number
created_at: number
finished_at: number | null
}
export function recordSpawnStart(input: {
agentName: string
agentId?: number
spawnType?: string
sessionId?: string
trigger?: string
workspaceId?: number
}): number {
const db = getDatabase()
const result = db.prepare(`
INSERT INTO spawn_history (agent_name, agent_id, spawn_type, session_id, trigger, status, workspace_id)
VALUES (?, ?, ?, ?, ?, 'started', ?)
`).run(
input.agentName,
input.agentId ?? null,
input.spawnType ?? 'claude-code',
input.sessionId ?? null,
input.trigger ?? null,
input.workspaceId ?? 1,
)
return result.lastInsertRowid as number
}
export function recordSpawnFinish(id: number, input: {
status: 'completed' | 'failed' | 'terminated'
exitCode?: number
error?: string
durationMs?: number
}): void {
const db = getDatabase()
db.prepare(`
UPDATE spawn_history
SET status = ?, exit_code = ?, error = ?, duration_ms = ?, finished_at = unixepoch()
WHERE id = ?
`).run(
input.status,
input.exitCode ?? null,
input.error ?? null,
input.durationMs ?? null,
id,
)
}
export function getSpawnHistory(agentName: string, opts?: {
hours?: number
limit?: number
workspaceId?: number
}): SpawnRecord[] {
const db = getDatabase()
const hours = opts?.hours ?? 24
const limit = opts?.limit ?? 50
const since = Math.floor(Date.now() / 1000) - hours * 3600
return db.prepare(`
SELECT * FROM spawn_history
WHERE agent_name = ? AND workspace_id = ? AND created_at > ?
ORDER BY created_at DESC
LIMIT ?
`).all(agentName, opts?.workspaceId ?? 1, since, limit) as SpawnRecord[]
}
export function getSpawnStats(opts?: {
hours?: number
workspaceId?: number
}): {
total: number
completed: number
failed: number
avgDurationMs: number
byAgent: Array<{ agent_name: string; count: number; failures: number }>
} {
const db = getDatabase()
const hours = opts?.hours ?? 24
const since = Math.floor(Date.now() / 1000) - hours * 3600
const wsId = opts?.workspaceId ?? 1
const totals = db.prepare(`
SELECT
COUNT(*) as total,
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
AVG(duration_ms) as avg_duration
FROM spawn_history
WHERE workspace_id = ? AND created_at > ?
`).get(wsId, since) as any
const byAgent = db.prepare(`
SELECT
agent_name,
COUNT(*) as count,
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failures
FROM spawn_history
WHERE workspace_id = ? AND created_at > ?
GROUP BY agent_name
ORDER BY count DESC
`).all(wsId, since) as any[]
return {
total: totals?.total ?? 0,
completed: totals?.completed ?? 0,
failed: totals?.failed ?? 0,
avgDurationMs: Math.round(totals?.avg_duration ?? 0),
byAgent: byAgent.map((row: any) => ({
agent_name: row.agent_name,
count: row.count,
failures: row.failures,
})),
}
}

View File

@ -58,7 +58,7 @@ async function mcpRequest(method: string, params: object = {}, id = 1): Promise<
} }
/** Call an MCP tool and return the parsed content */ /** Call an MCP tool and return the parsed content */
async function mcpTool(name: string, args: object = {}): Promise<{ content: string; isError?: boolean }> { async function mcpTool(name: string, args: object = {}): Promise<{ content: any; isError?: boolean }> {
const response = await mcpRequest('tools/call', { name, arguments: args }, 99) const response = await mcpRequest('tools/call', { name, arguments: args }, 99)
const text = response?.result?.content?.[0]?.text || '' const text = response?.result?.content?.[0]?.text || ''
let parsed: any let parsed: any