commit 7e29cdac2bc27e9059136faf0e1f83f107d46877 Author: James Date: Mon Mar 23 11:59:49 2026 -0400 Initial commit: vault1984 NOC dashboard + monitoring agent Pulled from noc.vault1984.com:/home/johan/vault1984-dashboard/ - dashboard.go: NOC status dashboard (SQLite, incidents, telemetry, AI chat) - chat.go + chat-ws.go: agent chat server - agent/: lightweight monitoring agent (CPU/disk/mem → HQ) - DEPLOYMENT-HANDOFF.md: deployment notes diff --git a/DEPLOYMENT-HANDOFF.md b/DEPLOYMENT-HANDOFF.md new file mode 100644 index 0000000..f0247e7 --- /dev/null +++ b/DEPLOYMENT-HANDOFF.md @@ -0,0 +1,105 @@ +# Vault1984 POP Deployment — Handoff for Hans + +**From:** Johan / James +**Date:** March 7, 2026 +**Status:** Binaries built, download endpoints added, ready for your rollout + +--- + +## What's ready on HQ (noc.vault1984.com) + +Two new binaries in `/home/johan/vault1984-dashboard/`: + +| File | Arch | Size | +|------|------|------| +| `vault1984` | linux/amd64 | ~18MB | +| `vault1984-arm64` | linux/arm64 | ~18MB | + +Download endpoints added to the dashboard (need rebuild): +- `http://185.218.204.47:8080/download/vault1984` +- `http://185.218.204.47:8080/download/vault1984-arm64` + +To activate: rebuild `dashboard-go` and restart the service. + +## What's new in the binary + +Built-in telemetry. When launched with these flags, the vault POSTs system + vault metrics to HQ every N seconds: + +``` +--telemetry-freq=60 +--telemetry-host=http://185.218.204.47:8080/telemetry +--telemetry-token= +``` + +Also works via env vars: `TELEMETRY_FREQ`, `TELEMETRY_HOST`, `TELEMETRY_TOKEN`. Without flags, telemetry is off — no behavior change for self-hosters. + +**Payload** (JSON POST): +```json +{ + "version": "0.1.0", + "hostname": "virginia", + "uptime_seconds": 3600, + "timestamp": "2026-03-06T10:00:00Z", + "system": { + "os": "linux", "arch": "arm64", "cpus": 2, + "cpu_percent": 12.5, + "memory_total_mb": 1024, "memory_used_mb": 340, + "disk_total_mb": 8000, "disk_used_mb": 1200, + "load_1m": 0.3 + }, + "vaults": { + "count": 0, "total_size_mb": 0, "total_entries": 0 + }, + "mode": "hosted" +} +``` + +## What needs doing + +### 1. Telemetry inbox on the dashboard + +The dashboard doesn't have a `/telemetry` handler yet. You'll want to add one that: +- Accepts the JSON payload above +- Stores it (SQLite, or just update the existing nodes table) +- Feeds into the status page + +This is your call on how to wire it in — you know the dashboard code best. + +### 2. Wipe the status DB + +Johan wants the status.db wiped clean and rebuilt with only the three live nodes: + +| Node ID | Name | Region | IP | +|---------|------|--------|----| +| `hq-zurich` | HQ — Zürich | Hostkey / CH | 185.218.204.47 | +| `virginia` | Virginia | **us-east-1** | ? | +| `singapore` | Singapore | ap-southeast-1 | 47.129.4.217 | + +**Important:** The current "virginia" POP is tagged `us-east-2` with IP `3.145.131.247` — that's **Ohio (Dublin)**. Johan does NOT want Ohio. Please confirm: +- Was this already moved to us-east-1 (actual Virginia)? +- If not, we need to spin down Ohio and deploy in us-east-1. + +The planned nodes (london, frankfurt, tokyo, etc.) can stay in the seed data as "planned" but shouldn't be in the live status rotation until deployed. + +### 3. Deploy vault1984 to the two AWS POPs + +Each POP needs: +- The vault1984 binary (arm64 for t4g.micro) +- A systemd service with telemetry flags pointing to HQ +- Port 1984 open +- `DATA_DIR` for vault storage + +You already have `deploy-pop.sh` and SSM access — adapt as you see fit. The vault1984 binary replaces nothing; it runs alongside the existing v1984-agent (or you can consolidate, since vault1984 now reports its own metrics). + +### 4. Infrastructure overview (for clarity) + +| Server | Role | Location | +|--------|------|----------| +| zurich.inou.com | Kuma, security checks, shared git | Hostkey Zürich | +| noc.vault1984.com | Dashboard, status page, marketing site, HQ | Hostkey Zürich | +| virginia POP | Vault1984 hosted node | AWS us-east-1 (confirm!) | +| singapore POP | Vault1984 hosted node | AWS ap-southeast-1 | + +--- + +Questions? Ping Johan or ask James in the next session. diff --git a/agent/disk_linux.go b/agent/disk_linux.go new file mode 100644 index 0000000..12eed15 --- /dev/null +++ b/agent/disk_linux.go @@ -0,0 +1,16 @@ +package main + +import "syscall" + +func diskUsage(path string) float64 { + var stat syscall.Statfs_t + if err := syscall.Statfs(path, &stat); err != nil { + return 0 + } + total := stat.Blocks * uint64(stat.Bsize) + free := stat.Bavail * uint64(stat.Bsize) + if total == 0 { + return 0 + } + return float64(total-free) / float64(total) +} diff --git a/agent/go.mod b/agent/go.mod new file mode 100644 index 0000000..6104c7e --- /dev/null +++ b/agent/go.mod @@ -0,0 +1,3 @@ +module vault1984-agent + +go 1.24.0 diff --git a/agent/main.go b/agent/main.go new file mode 100644 index 0000000..39c0de4 --- /dev/null +++ b/agent/main.go @@ -0,0 +1,210 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "strconv" + "strings" + "time" +) + +// Config — override via env vars +var ( + nodeID = getenv("NODE_ID", "unknown") + hqURL = getenv("HQ_URL", "http://185.218.204.47:8080") + interval = 60 * time.Second +) + +func getenv(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} + +// ---- /proc readers ---- + +func cpuPercent() float64 { + // Read two samples 500ms apart + s1 := readCPUStat() + time.Sleep(500 * time.Millisecond) + s2 := readCPUStat() + + idle1 := s1[3] + idle2 := s2[3] + total1, total2 := sum(s1), sum(s2) + totalDiff := total2 - total1 + idleDiff := idle2 - idle1 + if totalDiff == 0 { + return 0 + } + return float64(totalDiff-idleDiff) / float64(totalDiff) +} + +func readCPUStat() []uint64 { + data, err := os.ReadFile("/proc/stat") + if err != nil { + return make([]uint64, 10) + } + for _, line := range strings.Split(string(data), "\n") { + if !strings.HasPrefix(line, "cpu ") { + continue + } + fields := strings.Fields(line)[1:] // skip "cpu" + vals := make([]uint64, len(fields)) + for i, f := range fields { + vals[i], _ = strconv.ParseUint(f, 10, 64) + } + return vals + } + return make([]uint64, 10) +} + +func sum(vals []uint64) uint64 { + var t uint64 + for _, v := range vals { + t += v + } + return t +} + +func memPercent() float64 { + data, err := os.ReadFile("/proc/meminfo") + if err != nil { + return 0 + } + kv := map[string]uint64{} + for _, line := range strings.Split(string(data), "\n") { + parts := strings.Fields(line) + if len(parts) < 2 { + continue + } + key := strings.TrimSuffix(parts[0], ":") + val, _ := strconv.ParseUint(parts[1], 10, 64) + kv[key] = val + } + total := kv["MemTotal"] + avail := kv["MemAvailable"] + if total == 0 { + return 0 + } + return float64(total-avail) / float64(total) +} + +func diskPercent() float64 { + // Use /proc/mounts + statfs-equivalent via df output isn't available + // Read from /proc/diskstats isn't straightforward for percent + // Use /sys/fs (fallback: read statvfs via syscall) + // Simple approach: parse `df /` output via reading /proc/mounts + // Actually the cleanest without CGo: read /proc/self/mountstats or use syscall.Statfs + // We'll use a simple read of /proc/mounts and pick root + // For a CGo-free approach, parse /proc/diskstats for read/write rates instead + data, err := os.ReadFile("/proc/diskstats") + if err != nil { + return 0 + } + // Just return a simple proxy: if disk stats exist, device is healthy + _ = data + // Real disk % requires syscall.Statfs — we'll do it inline + return diskUsage("/") +} + +// ---- OTLP JSON builder ---- + +func buildOTLP(cpu, mem, disk float64) []byte { + now := fmt.Sprintf("%d", time.Now().UnixNano()) + + payload := map[string]interface{}{ + "resourceMetrics": []interface{}{ + map[string]interface{}{ + "resource": map[string]interface{}{ + "attributes": []interface{}{ + attr("service.name", "vault1984-pop"), + attr("node.id", nodeID), + attr("host.name", hostname()), + }, + }, + "scopeMetrics": []interface{}{ + map[string]interface{}{ + "scope": map[string]interface{}{ + "name": "vault1984-agent", + "version": "0.1", + }, + "metrics": []interface{}{ + gauge("system.cpu.utilization", cpu, now), + gauge("system.memory.utilization", mem, now), + gauge("system.disk.utilization", disk, now), + }, + }, + }, + }, + }, + } + b, _ := json.Marshal(payload) + return b +} + +func attr(k, v string) map[string]interface{} { + return map[string]interface{}{ + "key": k, + "value": map[string]interface{}{"stringValue": v}, + } +} + +func gauge(name string, val float64, tsNano string) map[string]interface{} { + return map[string]interface{}{ + "name": name, + "gauge": map[string]interface{}{ + "dataPoints": []interface{}{ + map[string]interface{}{ + "timeUnixNano": tsNano, + "asDouble": val, + }, + }, + }, + } +} + +func hostname() string { + h, _ := os.Hostname() + return h +} + +// ---- Push ---- + +func push(cpu, mem, disk float64) error { + body := buildOTLP(cpu, mem, disk) + + // Push OTLP to HQ + resp, err := http.Post(hqURL+"/v1/metrics", "application/json", bytes.NewReader(body)) + if err != nil { + return fmt.Errorf("OTLP push: %w", err) + } + resp.Body.Close() + if resp.StatusCode >= 300 { + return fmt.Errorf("OTLP push status: %d", resp.StatusCode) + } + + log.Printf("pushed: cpu=%.1f%% mem=%.1f%% disk=%.1f%%", cpu*100, mem*100, disk*100) + return nil +} + +func main() { + log.Printf("=== vault1984-agent node=%s hq=%s interval=%s ===", nodeID, hqURL, interval) + + for { + cpu := cpuPercent() + mem := memPercent() + disk := diskPercent() + + if err := push(cpu, mem, disk); err != nil { + log.Printf("push error: %v", err) + } + + time.Sleep(interval) + } +} diff --git a/chat-ws.go b/chat-ws.go new file mode 100644 index 0000000..6255266 --- /dev/null +++ b/chat-ws.go @@ -0,0 +1,533 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "strings" + "sync" + "time" + + "github.com/gorilla/websocket" +) + +const ( + listenAddr = "100.85.192.60:1985" + gatewayURL = "ws://127.0.0.1:18789/" + gatewayToken = "601267edaccf8cd3d6afe222c3ce63602e210ff1ecc9a268" + sessionKey = "agent:main:main" +) + +// --------------------------------------------------------------------------- +// Gateway connection +// --------------------------------------------------------------------------- + +type Gateway struct { + mu sync.Mutex + conn *websocket.Conn + connected bool + // Pending requests: reqId/runId → channel that receives gateway frames + pending map[string]chan map[string]interface{} + pendingMu sync.Mutex +} + +var gw = &Gateway{pending: make(map[string]chan map[string]interface{})} + +func (g *Gateway) register(key string) chan map[string]interface{} { + ch := make(chan map[string]interface{}, 128) + g.pendingMu.Lock() + g.pending[key] = ch + g.pendingMu.Unlock() + return ch +} + +func (g *Gateway) unregister(key string) { + g.pendingMu.Lock() + delete(g.pending, key) + g.pendingMu.Unlock() +} + +func (g *Gateway) dispatch(key string, msg map[string]interface{}) bool { + g.pendingMu.Lock() + ch, ok := g.pending[key] + g.pendingMu.Unlock() + if ok { + select { + case ch <- msg: + default: // drop if full + } + } + return ok +} + +func (g *Gateway) send(msg interface{}) error { + g.mu.Lock() + defer g.mu.Unlock() + if g.conn == nil { + return fmt.Errorf("not connected") + } + return g.conn.WriteMessage(websocket.TextMessage, mustJSON(msg)) +} + +// connect dials the gateway, performs the challenge/connect handshake, +// then enters a read loop that dispatches incoming frames. +func (g *Gateway) connect() { + for { + conn, _, err := websocket.DefaultDialer.Dial(gatewayURL, nil) + if err != nil { + log.Printf("[gw] dial: %v", err) + time.Sleep(3 * time.Second) + continue + } + + // 1. Read challenge + var challenge map[string]interface{} + if err := conn.ReadJSON(&challenge); err != nil { + log.Printf("[gw] challenge read: %v", err) + conn.Close() + time.Sleep(3 * time.Second) + continue + } + + // 2. Send connect request + connectReq := map[string]interface{}{ + "type": "req", + "id": fmt.Sprintf("conn-%d", time.Now().UnixMilli()), + "method": "connect", + "params": map[string]interface{}{ + "minProtocol": 3, + "maxProtocol": 3, + "client": map[string]interface{}{ + "id": "vault1984-chat", + "version": "2026.3.2", + "platform": "linux", + "mode": "cli", + }, + "auth": map[string]string{"token": gatewayToken}, + "scopes": []string{"operator.admin"}, + }, + } + conn.WriteJSON(connectReq) + + // 3. Read connect response + conn.SetReadDeadline(time.Now().Add(10 * time.Second)) + var resp map[string]interface{} + if err := conn.ReadJSON(&resp); err != nil || resp["ok"] != true { + log.Printf("[gw] handshake failed: %v resp=%v", err, resp) + conn.Close() + time.Sleep(3 * time.Second) + continue + } + + log.Println("[gw] connected") + g.mu.Lock() + g.conn = conn + g.connected = true + g.mu.Unlock() + + // Keep-alive pinger + go func() { + for g.connected { + time.Sleep(25 * time.Second) + g.mu.Lock() + if g.conn != nil { + g.conn.WriteMessage(websocket.PingMessage, nil) + } + g.mu.Unlock() + } + }() + + // 4. Read loop — route frames to waiting callers + for { + conn.SetReadDeadline(time.Now().Add(90 * time.Second)) + var frame map[string]interface{} + if err := conn.ReadJSON(&frame); err != nil { + log.Printf("[gw] read: %v", err) + break + } + + switch frame["type"] { + case "res": + // Response to a request — dispatch by id + if id, _ := frame["id"].(string); id != "" { + g.dispatch(id, frame) + } + + case "event": + event, _ := frame["event"].(string) + if event == "chat" { + if p, ok := frame["payload"].(map[string]interface{}); ok { + if runId, _ := p["runId"].(string); runId != "" { + g.dispatch(runId, p) + } + } + } + // Ignore health, tick, presence, etc. + } + } + + // Disconnected — reset and retry + g.mu.Lock() + g.conn = nil + g.connected = false + g.mu.Unlock() + log.Println("[gw] disconnected, reconnecting...") + time.Sleep(2 * time.Second) + } +} + +// Chat sends a message to the agent and collects the streamed response. +func (g *Gateway) Chat(text string) (string, error) { + reqId := fmt.Sprintf("req-%d", time.Now().UnixMilli()) + + // Register to receive the ack (keyed by reqId) + ch := g.register(reqId) + defer g.unregister(reqId) + + // Send chat.send + err := g.send(map[string]interface{}{ + "type": "req", + "id": reqId, + "method": "chat.send", + "params": map[string]interface{}{ + "sessionKey": sessionKey, + "message": text, + "idempotencyKey": reqId, + }, + }) + if err != nil { + return "", err + } + + timeout := time.After(120 * time.Second) + var buf strings.Builder + runRegistered := false + + for { + select { + case <-timeout: + if buf.Len() > 0 { + return buf.String(), nil + } + return "", fmt.Errorf("timeout") + + case msg := <-ch: + // Handle ack response to chat.send + if msg["type"] == "res" { + if msg["ok"] != true { + errObj, _ := msg["error"].(map[string]interface{}) + errMsg, _ := errObj["message"].(string) + return "", fmt.Errorf("gateway: %s", errMsg) + } + // Extract runId and register for chat events + if payload, ok := msg["payload"].(map[string]interface{}); ok { + if runId, _ := payload["runId"].(string); runId != "" && !runRegistered { + g.pendingMu.Lock() + g.pending[runId] = ch // reuse same channel + g.pendingMu.Unlock() + defer g.unregister(runId) + runRegistered = true + } + } + continue + } + + // Handle chat event (delta / final / error) + state, _ := msg["state"].(string) + + if state == "delta" { + for _, text := range extractText(msg) { + buf.WriteString(text) + } + } + + if state == "final" { + // If we got deltas, use them; otherwise use the final message + if buf.Len() == 0 { + for _, text := range extractText(msg) { + buf.WriteString(text) + } + } + result := strings.TrimSpace(buf.String()) + if result == "" { + return "[no response]", nil + } + return result, nil + } + + if state == "error" { + errMsg, _ := msg["errorMessage"].(string) + if buf.Len() > 0 { + return buf.String(), nil + } + return "", fmt.Errorf("agent error: %s", errMsg) + } + } + } +} + +// extractText pulls text parts from a chat event message payload. +func extractText(evt map[string]interface{}) []string { + msg, ok := evt["message"].(map[string]interface{}) + if !ok { + return nil + } + parts, ok := msg["content"].([]interface{}) + if !ok { + return nil + } + var texts []string + for _, p := range parts { + if m, ok := p.(map[string]interface{}); ok { + if t, ok := m["text"].(string); ok { + texts = append(texts, t) + } + } + } + return texts +} + +// --------------------------------------------------------------------------- +// Chat message store +// --------------------------------------------------------------------------- + +type Message struct { + ID int64 `json:"id"` + Sender string `json:"sender"` + SenderType string `json:"sender_type"` + Content string `json:"content"` + Timestamp int64 `json:"timestamp"` +} + +var ( + messages []Message + msgMu sync.RWMutex + uiClients []*websocket.Conn + uiMu sync.Mutex +) + +func addMessage(m Message) { + msgMu.Lock() + messages = append(messages, m) + if len(messages) > 200 { + messages = messages[len(messages)-200:] + } + msgMu.Unlock() + broadcastUI(m) +} + +func broadcastUI(m Message) { + data := mustJSON(m) + uiMu.Lock() + defer uiMu.Unlock() + alive := make([]*websocket.Conn, 0, len(uiClients)) + for _, c := range uiClients { + if err := c.WriteMessage(websocket.TextMessage, data); err == nil { + alive = append(alive, c) + } + } + uiClients = alive +} + +// --------------------------------------------------------------------------- +// HTTP handlers +// --------------------------------------------------------------------------- + +var wsUpgrader = websocket.Upgrader{CheckOrigin: func(r *http.Request) bool { return true }} + +func handleWS(w http.ResponseWriter, r *http.Request) { + conn, err := wsUpgrader.Upgrade(w, r, nil) + if err != nil { + return + } + + // Add to broadcast list + uiMu.Lock() + uiClients = append(uiClients, conn) + uiMu.Unlock() + + // Read messages from the UI + for { + _, data, err := conn.ReadMessage() + if err != nil { + break + } + var incoming struct { + Content string `json:"content"` + Sender string `json:"sender"` + } + json.Unmarshal(data, &incoming) + if incoming.Content == "" { + continue + } + if incoming.Sender == "" { + incoming.Sender = "Johan" + } + + userMsg := Message{ + ID: time.Now().UnixMilli(), + Sender: incoming.Sender, + SenderType: "human", + Content: incoming.Content, + Timestamp: time.Now().UnixMilli(), + } + addMessage(userMsg) + + // Get agent response + go func(text string) { + response, err := gw.Chat(text) + if err != nil { + response = fmt.Sprintf("[error: %v]", err) + } + addMessage(Message{ + ID: time.Now().UnixMilli(), + Sender: "Hans", + SenderType: "ai", + Content: response, + Timestamp: time.Now().UnixMilli(), + }) + }(incoming.Content) + } +} + +func handleAPI(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", 405) + return + } + var msg Message + json.NewDecoder(r.Body).Decode(&msg) + if msg.Content == "" { + http.Error(w, "content required", 400) + return + } + if msg.Sender == "" { + msg.Sender = "Johan" + } + msg.SenderType = "human" + msg.Timestamp = time.Now().UnixMilli() + msg.ID = msg.Timestamp + addMessage(msg) + + go func() { + response, err := gw.Chat(msg.Content) + if err != nil { + response = fmt.Sprintf("[error: %v]", err) + } + addMessage(Message{ + ID: time.Now().UnixMilli(), + Sender: "Hans", + SenderType: "ai", + Content: response, + Timestamp: time.Now().UnixMilli(), + }) + }() + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]bool{"ok": true}) +} + +func handleMessages(w http.ResponseWriter, r *http.Request) { + msgMu.RLock() + defer msgMu.RUnlock() + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(messages) +} + +func handleStatus(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "gateway": gw.connected, + "session": sessionKey, + }) +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +func main() { + go gw.connect() + + // Wait for gateway + for !gw.connected { + time.Sleep(200 * time.Millisecond) + } + log.Printf("Vault1984 Chat ready — http://%s", listenAddr) + + http.HandleFunc("/", serveHTML) + http.HandleFunc("/ws", handleWS) + http.HandleFunc("/api/send", handleAPI) + http.HandleFunc("/api/messages", handleMessages) + http.HandleFunc("/api/status", handleStatus) + log.Fatal(http.ListenAndServe(listenAddr, nil)) +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +func mustJSON(v interface{}) []byte { + b, _ := json.Marshal(v) + return b +} + +// --------------------------------------------------------------------------- +// Embedded UI +// --------------------------------------------------------------------------- + +func serveHTML(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` + +Vault1984 Chat + +
+

🔒 VAULT1984 // CHAT

+
+
+ +
`)) +} diff --git a/chat.go b/chat.go new file mode 100644 index 0000000..b6d73dc --- /dev/null +++ b/chat.go @@ -0,0 +1,361 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "log" + "net/http" + "os/exec" + "strings" + "sync" + "time" + + "github.com/gorilla/websocket" +) + +var upgrader = websocket.Upgrader{ + ReadBufferSize: 1024, + WriteBufferSize: 1024, + CheckOrigin: func(r *http.Request) bool { + return true // Allow all origins for now + }, +} + +type Message struct { + ID int64 `json:"id"` + Sender string `json:"sender"` + SenderType string `json:"sender_type"` // "ai", "human", "system" + Content string `json:"content"` + Timestamp int64 `json:"timestamp"` +} + +type Client struct { + conn *websocket.Conn + sender string + senderType string +} + +var ( + clients = make(map[*Client]bool) + clientsMu sync.RWMutex + messages []Message + messagesMu sync.RWMutex + maxMessages = 1000 +) + +const ( + port = 1985 + tailscaleIP = "100.85.192.60" // Only accessible via Tailscale + openClawURL = "http://127.0.0.1:18789/tools/invoke" + openClawToken = "601267edaccf8cd3d6afe222c3ce63602e210ff1ecc9a268" +) + +func main() { + flag.Parse() + + // HTTP endpoints + http.HandleFunc("/", handleHTML) + http.HandleFunc("/chat.js", handleJS) + http.HandleFunc("/api/send", handleSend) + http.HandleFunc("/api/messages", handleMessages) + http.HandleFunc("/api/status", handleStatus) + + // WebSocket + http.HandleFunc("/ws", handleWS) + + addr := fmt.Sprintf("100.85.192.60:%d", port) // Bind to Tailscale IP only + fmt.Printf("Vault1984 Chat v0.2 (Go+WS) running on http://localhost:%d\n", port) + log.Fatal(http.ListenAndServe(addr, nil)) +} + +func handleHTML(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(htmlPage)) +} + +func handleJS(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/javascript") + w.Write([]byte(jsPage)) +} + +func handleSend(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { + http.Error(w, "Method not allowed", 405) + return + } + + var msg Message + if err := json.NewDecoder(r.Body).Decode(&msg); err != nil { + http.Error(w, "Invalid JSON", 400) + return + } + + if msg.Sender == "" || msg.Content == "" { + http.Error(w, "Missing sender or content", 400) + return + } + + if msg.SenderType == "" { + msg.SenderType = "ai" + } + + msg.Timestamp = time.Now().UnixMilli() + msg.ID = msg.Timestamp + + messagesMu.Lock() + messages = append([]Message{msg}, messages...) + if len(messages) > maxMessages { + messages = messages[:maxMessages] + } + messagesMu.Unlock() + + // Broadcast to all connected WS clients + broadcast(msg) + + // If it's a human message, respond via OpenClaw agent + if msg.SenderType == "human" { + log.Printf("Calling OpenClaw for message from %s: %s", msg.Sender, msg.Content) + + // Call OpenClaw to get real response (synchronous - wait for it) + response := callOpenClaw(msg.Content, msg.Sender) + + // Add the response to messages + responseMsg := Message{ + ID: time.Now().UnixMilli(), + Sender: "Hans", + SenderType: "ai", + Content: response, + Timestamp: time.Now().UnixMilli(), + } + messagesMu.Lock() + messages = append([]Message{responseMsg}, messages...) + if len(messages) > maxMessages { + messages = messages[:maxMessages] + } + messagesMu.Unlock() + broadcast(responseMsg) + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "success": true, + "message": msg, + }) +} + +func handleMessages(w http.ResponseWriter, r *http.Request) { + since := 0 + if s := r.URL.Query().Get("since"); s != "" { + fmt.Sscanf(s, "%d", &since) + } + + messagesMu.RLock() + var filtered []Message + for _, m := range messages { + if int64(since) < m.Timestamp { + filtered = append(filtered, m) + } + } + messagesMu.RUnlock() + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(filtered) +} + +func handleStatus(w http.ResponseWriter, r *http.Request) { + clientsMu.RLock() + defer clientsMu.RUnlock() + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "status": "online", + "messages_count": len(messages), + "connected_count": len(clients), + "uptime": "N/A", // Could track startup time + }) +} + +func handleWS(w http.ResponseWriter, r *http.Request) { + conn, err := upgrader.Upgrade(w, r, nil) + if err != nil { + log.Println("WS upgrade error:", err) + return + } + + client := &Client{conn: conn, sender: "unknown", senderType: "human"} + clientsMu.Lock() + clients[client] = true + clientsMu.Unlock() + + // Send recent messages to new client (last 20) + messagesMu.RLock() + recent := messages + if len(recent) > 20 { + recent = recent[:20] + } + messagesMu.RUnlock() + + for _, m := range recent { + client.sendJSON(map[string]interface{}{ + "type": "history", + "messages": []Message{m}, + }) + } + + // Handle incoming messages + for { + _, data, err := conn.ReadMessage() + if err != nil { + break + } + + var msg map[string]interface{} + if err := json.Unmarshal(data, &msg); err != nil { + continue + } + + if msg["type"] == "register" { + if s, ok := msg["sender"].(string); ok { + client.sender = s + } + if st, ok := msg["sender_type"].(string); ok { + client.senderType = st + } + client.sendJSON(map[string]string{"type": "registered", "sender": client.sender}) + } + + if msg["type"] == "ping" { + client.sendJSON(map[string]string{"type": "pong"}) + } + + // Handle message sent via WS + log.Printf("WS message received: %v", msg) + if content, ok := msg["content"].(string); ok && content != "" { + sender := client.sender + senderType := client.senderType + if s, ok := msg["sender"].(string); ok && s != "" { + sender = s + } + if st, ok := msg["sender_type"].(string); ok && st != "" { + senderType = st + } + + log.Printf("Saving message from %s: %s", sender, content) + + // Save and broadcast the user's message + msgObj := Message{ + ID: time.Now().UnixMilli(), + Sender: sender, + SenderType: senderType, + Content: content, + Timestamp: time.Now().UnixMilli(), + } + + messagesMu.Lock() + messages = append([]Message{msgObj}, messages...) + if len(messages) > maxMessages { + messages = messages[:maxMessages] + } + messagesMu.Unlock() + + broadcast(msgObj) + + // If it's a human message, call OpenClaw and broadcast response + if senderType == "human" { + go func() { + response := callOpenClaw(content, sender) + responseMsg := Message{ + ID: time.Now().UnixMilli(), + Sender: "Hans", + SenderType: "ai", + Content: response, + Timestamp: time.Now().UnixMilli(), + } + messagesMu.Lock() + messages = append([]Message{responseMsg}, messages...) + if len(messages) > maxMessages { + messages = messages[:maxMessages] + } + messagesMu.Unlock() + broadcast(responseMsg) + }() + } + } + } + + clientsMu.Lock() + delete(clients, client) + clientsMu.Unlock() + conn.Close() +} + +func (c *Client) sendJSON(v interface{}) { + if err := c.conn.WriteJSON(v); err != nil { + log.Println("WS send error:", err) + } +} + +func broadcast(msg Message) { + clientsMu.RLock() + defer clientsMu.RUnlock() + + for client := range clients { + client.sendJSON(map[string]interface{}{ + "type": "message", + "id": msg.ID, + "sender": msg.Sender, + "sender_type": msg.SenderType, + "content": msg.Content, + "timestamp": msg.Timestamp, + }) + } +} + +// callOpenClaw sends a message to OpenClaw and returns a real response +func callOpenClaw(content, sender string) string { + // Use local agent - will load model (~10s) but works + // This is the fastest we can get with CLI without Gateway integration + cmd := exec.Command("openclaw", "agent", + "--message", content, + "--session-id", fmt.Sprintf("chat-%d", time.Now().UnixMilli()), + "--local", + "--timeout", "15") + + output, err := cmd.Output() + + if err != nil { + log.Printf("OpenClaw agent error: %v, output: %s", err, string(output)) + return "📩 Received: " + content + } + + // Extract the response from the output + response := strings.TrimSpace(string(output)) + + // Clean up the response - take the first meaningful line(s) + lines := strings.Split(response, "\n") + var cleanResponse string + for _, line := range lines { + line = strings.TrimSpace(line) + if line != "" && !strings.HasPrefix(line, "{") && !strings.HasPrefix(line, "[") { + cleanResponse = line + break + } + } + + if cleanResponse == "" { + cleanResponse = "📩 Received: " + content + } + + // Truncate if too long + if len(cleanResponse) > 500 { + cleanResponse = cleanResponse[:500] + "..." + } + + log.Printf("Agent response: %s", cleanResponse) + return cleanResponse +} + +const htmlPage = "\n\n\n \n \n Vault1984 — AI Chat\n \n\n\n
\n
\n

🔒 VAULT1984 // CHAT v0.3

\n
● CONNECTED
\n
\n
\n
\n \n \n
\n
\n \n\n" + +const jsPage = "let ws;\nlet reconnectAttempts = 0;\nconst maxReconnectAttempts = 10;\nconst reconnectDelay = 1000;\n\nfunction getSenderClass(sender, senderType) {\n if (!sender) return 'system';\n const s = sender.toLowerCase();\n if (s === 'johan') return 'johan';\n if (s === 'hans') return 'hans';\n if (s === 'james') return 'james';\n if (senderType === 'human') return 'human';\n return 'ai';\n}\n\nfunction connect() {\n ws = new WebSocket('ws://' + location.host + '/ws');\n \n ws.onopen = () => {\n document.getElementById('status').className = 'status online';\n document.getElementById('status').textContent = '● CONNECTED';\n reconnectAttempts = 0;\n ws.send(JSON.stringify({ type: 'register', sender: 'Johan', sender_type: 'human' }));\n };\n \n ws.onmessage = (e) => {\n const data = JSON.parse(e.data);\n if (data.type === 'history' && data.messages) {\n // Sort by timestamp (oldest first)\n data.messages.sort((a, b) => a.timestamp - b.timestamp);\n data.messages.forEach(addMessage);\n } else if (data.type === 'message') {\n addMessage(data);\n }\n };\n \n ws.onclose = () => {\n document.getElementById('status').className = 'status';\n document.getElementById('status').textContent = '○ DISCONNECTED';\n if (reconnectAttempts < maxReconnectAttempts) {\n reconnectAttempts++;\n setTimeout(connect, reconnectDelay * reconnectAttempts);\n }\n };\n}\n\nfunction addMessage(msg) {\n const div = document.createElement('div');\n const senderClass = getSenderClass(msg.sender, msg.sender_type);\n div.className = 'message ' + senderClass;\n \n let content = msg.content || '';\n content = content.replace(/`([^`]+)`/g, '$1');\n content = content.replace(/\\*\\*([^*]+)\\*\\*/g, '$1');\n \n div.innerHTML = '
' + (msg.sender || 'System') + '
' + content + '
' + new Date(msg.timestamp).toLocaleString() + '
';\n document.getElementById('chat').appendChild(div);\n document.getElementById('chat').scrollTop = document.getElementById('chat').scrollHeight;\n}\n\nfunction send() {\n const input = document.getElementById('msg');\n const content = input.value.trim();\n if (!content || !ws || ws.readyState !== 1) return;\n ws.send(JSON.stringify({ sender: 'Johan', sender_type: 'human', content: content }));\n input.value = '';\n}\n\ndocument.getElementById('msg').addEventListener('keypress', (e) => { if (e.key === 'Enter') send(); });\nsetInterval(() => { if (ws && ws.readyState === 1) ws.send(JSON.stringify({ type: 'ping' })); }, 30000);\nconnect();" \ No newline at end of file diff --git a/dashboard.go b/dashboard.go new file mode 100644 index 0000000..2cb1f93 --- /dev/null +++ b/dashboard.go @@ -0,0 +1,1680 @@ +package main + +import ( + "bytes" + "database/sql" + "encoding/json" + "fmt" + "html/template" + "io" + "log" + "net/http" + "os" + "sort" + "strings" + "time" + + _ "modernc.org/sqlite" +) + +const ( + fireworksKey = "fw_RVcDe4c6mN4utKLsgA7hTm" + fireworksModel = "accounts/fireworks/models/gpt-oss-20b" + fireworksURL = "https://api.fireworks.ai/inference/v1/chat/completions" +) + +var systemPrompt = `You are the Vault1984 NOC assistant. Vault1984 is a global network of security POPs (Points of Presence) across 21 regions. + +Your job is to: +- Help users report network problems or outages +- Answer questions about Vault1984 infrastructure status +- Collect details about issues (region, symptoms, timestamps) +- Be concise and professional + +If someone reports a problem, acknowledge it, collect: affected region, what they observed, and when it started. Keep responses short.` + +const version = "v0.4" + +const ( + colorGreen = "#76AD2A" + colorOrange = "#E86235" + colorRed = "#E04343" + colorGray = "#D8D6D0" +) + +// Schema +const schema = ` +CREATE TABLE IF NOT EXISTS nodes ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + region TEXT NOT NULL, + ip TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'planned' +); + +CREATE TABLE IF NOT EXISTS uptime ( + node_id TEXT NOT NULL, + date TEXT NOT NULL, -- YYYY-MM-DD + status TEXT NOT NULL, -- operational | degraded | outage | planned + PRIMARY KEY (node_id, date) +); + +CREATE TABLE IF NOT EXISTS incidents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL, + status TEXT NOT NULL, -- resolved | monitoring | investigating + date TEXT NOT NULL, -- display date e.g. "Mar 4, 2026" + node_ids TEXT NOT NULL DEFAULT '', + created_at INTEGER NOT NULL DEFAULT (strftime('%s','now')) +); + +CREATE TABLE IF NOT EXISTS incident_updates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + incident_id INTEGER NOT NULL REFERENCES incidents(id), + ts TEXT NOT NULL, -- ISO8601 e.g. "2026-03-04 16:08 UTC" + status TEXT NOT NULL, -- investigating | identified | monitoring | resolved + body TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS telemetry ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + node_id TEXT NOT NULL, + received_at INTEGER NOT NULL DEFAULT (strftime('%s','now')), + version TEXT NOT NULL DEFAULT '', + hostname TEXT NOT NULL DEFAULT '', + uptime_seconds INTEGER NOT NULL DEFAULT 0, + cpu_percent REAL NOT NULL DEFAULT 0, + memory_total_mb INTEGER NOT NULL DEFAULT 0, + memory_used_mb INTEGER NOT NULL DEFAULT 0, + disk_total_mb INTEGER NOT NULL DEFAULT 0, + disk_used_mb INTEGER NOT NULL DEFAULT 0, + load_1m REAL NOT NULL DEFAULT 0, + vault_count INTEGER NOT NULL DEFAULT 0, + vault_size_mb REAL NOT NULL DEFAULT 0, + vault_entries INTEGER NOT NULL DEFAULT 0, + mode TEXT NOT NULL DEFAULT '' +); +` + +type Node struct { + ID string + Name string + Region string + IP string + Status string + History []DayStatus +} + +type DayStatus struct { + Date string + Status string +} + +type IncidentUpdate struct { + TS string + Status string + Body string +} + +type Incident struct { + ID int64 + Title string + Status string + Date string + NodeIDs string + Updates []IncidentUpdate +} + +type PageData struct { + UpdatedAt string + OverallOK bool + Nodes []*Node + Incidents []Incident +} + +func (n *Node) UptimePct() string { + if n.Status == "planned" { + return "—" + } + total, ok := 0, 0 + for _, h := range n.History { + if h.Status == "planned" || h.Status == "nodata" { + continue + } + total++ + if h.Status == "operational" { + ok++ + } + } + if total == 0 { + return "—" + } + return fmt.Sprintf("%.2f%%", float64(ok)/float64(total)*100) +} + +func (n *Node) SVGBars() template.HTML { + var sb strings.Builder + sb.WriteString(``) + for i, day := range n.History { + x := i * 5 + color := colorGray + switch day.Status { + case "operational": + color = colorGreen + case "degraded": + color = colorOrange + case "outage": + color = colorRed + case "nodata": + color = "none" + } + title := fmt.Sprintf("%s: %s", day.Date, day.Status) + sb.WriteString(fmt.Sprintf( + `%s`, + x, color, title, + )) + } + sb.WriteString(``) + return template.HTML(sb.String()) +} + +func (n *Node) StatusLabel() string { + switch n.Status { + case "operational": + return "Operational" + case "degraded": + return "Degraded Performance" + case "outage": + return "Major Outage" + case "planned": + return "Planned" + } + return "Unknown" +} + +func (n *Node) StatusColor() string { + switch n.Status { + case "operational": + return colorGreen + case "degraded": + return colorOrange + case "outage": + return colorRed + } + return colorGray +} + +func nodeOrder(n *Node) int { + if n.ID == "hq-zurich" { + return 0 + } + if n.Status == "operational" { + return 1 + } + return 2 +} + +var db *sql.DB + +// seedData populates the DB on first run +func seedData() { + // Insert nodes + nodeList := []struct{ id, name, region, ip, status string }{ + {"virginia", "Virginia", "us-east-1", "18.209.55.127", "operational"}, + {"singapore", "Singapore", "ap-southeast-1", "47.129.4.217", "operational"}, + {"zurich", "Zürich", "eu-central-2", "16.18.20.81", "operational"}, + {"ncalifornia", "N. California", "us-west-1", "", "planned"}, + {"montreal", "Montreal", "ca-central-1", "", "planned"}, + {"mexicocity", "Mexico City", "mx-central-1", "", "planned"}, + {"saopaulo", "São Paulo", "sa-east-1", "", "planned"}, + {"london", "London", "eu-west-2", "", "planned"}, + {"paris", "Paris", "eu-west-3", "", "planned"}, + {"spain", "Spain", "eu-south-2", "", "planned"}, + {"stockholm", "Stockholm", "eu-north-1", "", "planned"}, + {"uae", "UAE", "me-central-1", "", "planned"}, + {"telaviv", "Tel Aviv", "il-central-1", "", "planned"}, + {"capetown", "Cape Town", "af-south-1", "", "planned"}, + {"mumbai", "Mumbai", "ap-south-1", "", "planned"}, + {"jakarta", "Jakarta", "ap-southeast-3", "", "planned"}, + {"malaysia", "Malaysia", "ap-southeast-5", "", "planned"}, + {"sydney", "Sydney", "ap-southeast-2", "", "planned"}, + {"seoul", "Seoul", "ap-northeast-2", "", "planned"}, + {"hongkong", "Hong Kong", "ap-east-1", "", "planned"}, + {"tokyo", "Tokyo", "ap-northeast-1", "", "planned"}, + } + for _, n := range nodeList { + db.Exec(`INSERT OR IGNORE INTO nodes(id,name,region,ip,status) VALUES(?,?,?,?,?)`, + n.id, n.name, n.region, n.ip, n.status) + } + + // Planned nodes: seed gray bars so the UI renders correctly + var plannedIDs []string + rows, _ := db.Query(`SELECT id FROM nodes WHERE status='planned'`) + for rows.Next() { + var id string + rows.Scan(&id) + plannedIDs = append(plannedIDs, id) + } + rows.Close() + now := time.Now() + for _, nodeID := range plannedIDs { + for i := 89; i >= 0; i-- { + date := now.AddDate(0, 0, -i).Format("2006-01-02") + db.Exec(`INSERT OR IGNORE INTO uptime(node_id,date,status) VALUES(?,?,?)`, nodeID, date, "planned") + } + } + +} + +func loadNodes() []*Node { + rows, err := db.Query(`SELECT id,name,region,ip,status FROM nodes`) + if err != nil { + log.Printf("loadNodes error: %v", err) + return nil + } + defer rows.Close() + + var nodes []*Node + for rows.Next() { + n := &Node{} + rows.Scan(&n.ID, &n.Name, &n.Region, &n.IP, &n.Status) + nodes = append(nodes, n) + } + + // Load 90-day history for each node + now := time.Now() + for _, n := range nodes { + n.History = make([]DayStatus, 90) + // Build date map from DB + histMap := map[string]string{} + hrows, _ := db.Query(`SELECT date,status FROM uptime WHERE node_id=? ORDER BY date`, n.ID) + for hrows.Next() { + var date, status string + hrows.Scan(&date, &status) + histMap[date] = status + } + hrows.Close() + + for i := 0; i < 90; i++ { + date := now.AddDate(0, 0, -(89 - i)).Format("2006-01-02") + status, ok := histMap[date] + if !ok { + if n.Status == "planned" { + status = "planned" + } else { + status = "nodata" // live node, no record yet — render transparent + } + } + n.History[i] = DayStatus{Date: date, Status: status} + } + } + + sort.SliceStable(nodes, func(i, j int) bool { + oi, oj := nodeOrder(nodes[i]), nodeOrder(nodes[j]) + if oi != oj { + return oi < oj + } + return nodes[i].Name < nodes[j].Name + }) + return nodes +} + +func loadIncidents() []Incident { + rows, err := db.Query(`SELECT id,title,status,date,node_ids FROM incidents ORDER BY created_at DESC`) + if err != nil { + return nil + } + defer rows.Close() + var list []Incident + for rows.Next() { + var inc Incident + rows.Scan(&inc.ID, &inc.Title, &inc.Status, &inc.Date, &inc.NodeIDs) + list = append(list, inc) + } + rows.Close() + + // Load updates for each incident + for i := range list { + urows, err := db.Query( + `SELECT ts,status,body FROM incident_updates WHERE incident_id=? ORDER BY id ASC`, + list[i].ID, + ) + if err != nil { + continue + } + for urows.Next() { + var u IncidentUpdate + urows.Scan(&u.TS, &u.Status, &u.Body) + list[i].Updates = append(list[i].Updates, u) + } + urows.Close() + } + return list +} + +func buildPage() PageData { + nodes := loadNodes() + ok := true + for _, n := range nodes { + if n.Status == "outage" || n.Status == "degraded" { + ok = false + break + } + } + return PageData{ + UpdatedAt: time.Now().UTC().Format("Jan 2, 2006 15:04 UTC"), + OverallOK: ok, + Nodes: nodes, + Incidents: loadIncidents(), + } +} + +var tpl = template.Must(template.New("page").Parse(` + + + + + Vault1984 Status + + + +
+ + {{if .OverallOK}} + + {{else}} + + {{end}} +
Uptime over the past 90 days.
+
Regional Points of Presence
+ {{range .Nodes}} +
+
+ + {{.Name}} + {{.Region}} + + {{.StatusLabel}} +
+
{{.SVGBars}}
+
+ 90 days ago + {{.UptimePct}} uptime + Today +
+
+ {{end}} + {{if .Incidents}} +
+
Past Incidents
+ {{range .Incidents}} +
+
+ {{.Title}} + {{.Date}} +
+ {{.Status}} + {{if .NodeIDs}}

Affected: {{.NodeIDs}}

{{end}} + {{if .Updates}} +
+ {{range .Updates}} +
+
+
{{.TS}}
+
{{.Status}}
+
{{.Body}}
+
+ {{end}} +
+ {{end}} +
+ {{end}} +
+ {{end}} +
vault1984.com — Global Network Operations Center
+
+ + + + +
+
+ 🛡 Vault1984 Support + +
+
+
Hi! Report a network issue or ask about our infrastructure. Which region are you seeing problems with?
+
+
+ + +
+
+ + + + +`)) + +// tailscaleOnly allows only requests from Tailscale (100.64.0.0/10) or localhost +func tailscaleOnly(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + ip := r.RemoteAddr + if i := strings.LastIndex(ip, ":"); i >= 0 { + ip = ip[:i] + } + ip = strings.Trim(ip, "[]") + allowed := ip == "127.0.0.1" || ip == "::1" || strings.HasPrefix(ip, "100.") + if !allowed { + http.Error(w, "Forbidden", 403) + return + } + next(w, r) + } +} + +func handleOpsDashboard(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + fmt.Fprint(w, opsDashboardHTML) +} + +const opsDashboardHTML = ` + + + + +vault1984 NOC + + + +
+

⚡ vault1984 NOC

+
+ + loading... +
+
+
+
+
+
+
+
+
+
+
+ + + +` + +func handleAdminGUI(w http.ResponseWriter, r *http.Request) { + nodes := loadNodes() + incidents := loadIncidents() + w.Header().Set("Content-Type", "text/html; charset=utf-8") + adminTpl.Execute(w, map[string]interface{}{ + "Nodes": nodes, + "Incidents": incidents, + "Now": time.Now().UTC().Format("Jan 2, 2006 15:04 UTC"), + }) +} + +func handleAdminNewIncident(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { w.WriteHeader(405); return } + var p struct { + Title string `json:"title"` + Status string `json:"status"` + Body string `json:"body"` + NodeIDs string `json:"node_ids"` + } + decode(r, &p) + date := time.Now().UTC().Format("Jan 2, 2006") + res, err := db.Exec(`INSERT INTO incidents(title,status,date,node_ids) VALUES(?,?,?,?)`, + p.Title, p.Status, date, p.NodeIDs) + if err != nil { jsonErr(w, err); return } + id, _ := res.LastInsertId() + ts := time.Now().UTC().Format("Jan 2, 2006 15:04 UTC") + db.Exec(`INSERT INTO incident_updates(incident_id,ts,status,body) VALUES(?,?,?,?)`, + id, ts, p.Status, p.Body) + // Update affected nodes + if p.NodeIDs != "" { + for _, nid := range strings.Split(p.NodeIDs, ",") { + nid = strings.TrimSpace(nid) + if nid != "" { + db.Exec(`UPDATE nodes SET status=? WHERE id=?`, p.Status, nid) + } + } + } + log.Printf("admin: new incident #%d: %s", id, p.Title) + jsonOK(w) +} + +func handleAdminUpdateIncident(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { w.WriteHeader(405); return } + var p struct { + IncidentID int64 `json:"incident_id"` + Status string `json:"status"` + Body string `json:"body"` + } + decode(r, &p) + ts := time.Now().UTC().Format("Jan 2, 2006 15:04 UTC") + db.Exec(`INSERT INTO incident_updates(incident_id,ts,status,body) VALUES(?,?,?,?)`, + p.IncidentID, ts, p.Status, p.Body) + db.Exec(`UPDATE incidents SET status=? WHERE id=?`, p.Status, p.IncidentID) + log.Printf("admin: updated incident #%d → %s", p.IncidentID, p.Status) + jsonOK(w) +} + +func handleAdminNodeStatus(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { w.WriteHeader(405); return } + var p struct { + ID string `json:"id"` + Status string `json:"status"` + } + decode(r, &p) + today := time.Now().Format("2006-01-02") + db.Exec(`UPDATE nodes SET status=? WHERE id=?`, p.Status, p.ID) + db.Exec(`INSERT OR REPLACE INTO uptime(node_id,date,status) VALUES(?,?,?)`, p.ID, today, p.Status) + log.Printf("admin: node %s → %s", p.ID, p.Status) + jsonOK(w) +} + +func decode(r *http.Request, v interface{}) { + ct := r.Header.Get("Content-Type") + if strings.Contains(ct, "application/json") { + json.NewDecoder(r.Body).Decode(v) + } else { + r.ParseForm() + // map form values into struct via JSON round-trip + m := map[string]string{} + for k, vs := range r.Form { if len(vs) > 0 { m[k] = vs[0] } } + b, _ := json.Marshal(m) + json.Unmarshal(b, v) + } +} +func jsonOK(w http.ResponseWriter) { + w.Header().Set("Content-Type", "application/json") + fmt.Fprintln(w, `{"ok":true}`) +} +func jsonErr(w http.ResponseWriter, err error) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) +} + +var adminTpl = template.Must(template.New("admin").Parse(` + + + + + Vault1984 Admin + + + +
+

🛡 Vault1984 Admin

+ + + +

Node Status

+
+ {{range .Nodes}} +
+ {{.Name}}
+ {{.Region}} + + +
+ {{end}} +
+ + +

Create Incident

+
+ + + + + + + + + +
Incident created ✓
+
+ + +

Post Update to Incident

+
+ {{if .Incidents}} + + + + + + + +
Update posted ✓
+ {{else}} +

No incidents yet.

+ {{end}} +
+
+ + + + +`)) + +func handleChat(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { + w.WriteHeader(405) + return + } + var req struct { + Messages []map[string]string `json:"messages"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + w.WriteHeader(400) + return + } + + // Prepend system message + msgs := append([]map[string]string{{"role": "system", "content": systemPrompt}}, req.Messages...) + + payload, _ := json.Marshal(map[string]interface{}{ + "model": fireworksModel, + "messages": msgs, + "max_tokens": 300, + "temperature": 0.7, + }) + + fwReq, _ := http.NewRequest("POST", fireworksURL, bytes.NewReader(payload)) + fwReq.Header.Set("Authorization", "Bearer "+fireworksKey) + fwReq.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(fwReq) + if err != nil { + log.Printf("fireworks error: %v", err) + http.Error(w, `{"error":"upstream error"}`, 502) + return + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + + var fw struct { + Choices []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` + } + if err := json.Unmarshal(body, &fw); err != nil || len(fw.Choices) == 0 { + log.Printf("fireworks parse error: %s", body) + http.Error(w, `{"error":"parse error"}`, 500) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{ + "reply": strings.TrimSpace(fw.Choices[0].Message.Content), + }) +} + +// OTLP JSON metric receiver +func handleOTLP(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { + w.WriteHeader(405) + return + } + + var payload struct { + ResourceMetrics []struct { + Resource struct { + Attributes []struct { + Key string `json:"key"` + Value struct { + StringValue string `json:"stringValue"` + } `json:"value"` + } `json:"attributes"` + } `json:"resource"` + ScopeMetrics []struct { + Metrics []struct { + Name string `json:"name"` + Gauge struct { + DataPoints []struct { + AsDouble float64 `json:"asDouble"` + } `json:"dataPoints"` + } `json:"gauge"` + } `json:"metrics"` + } `json:"scopeMetrics"` + } `json:"resourceMetrics"` + } + + if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { + w.WriteHeader(400) + log.Printf("OTLP decode error: %v", err) + return + } + + for _, rm := range payload.ResourceMetrics { + // Extract node.id from resource attributes + nodeID := "" + for _, a := range rm.Resource.Attributes { + if a.Key == "node.id" { + nodeID = a.Value.StringValue + } + } + if nodeID == "" { + continue + } + + // Extract metrics + metrics := map[string]float64{} + for _, sm := range rm.ScopeMetrics { + for _, m := range sm.Metrics { + if len(m.Gauge.DataPoints) > 0 { + metrics[m.Name] = m.Gauge.DataPoints[0].AsDouble + } + } + } + + cpu := metrics["system.cpu.utilization"] + mem := metrics["system.memory.utilization"] + disk := metrics["system.disk.utilization"] + + // Determine status from thresholds + status := "operational" + if cpu > 0.9 || mem > 0.9 || disk > 0.9 { + status = "degraded" + } + + // Update node status + today's uptime + today := time.Now().Format("2006-01-02") + db.Exec(`UPDATE nodes SET status=? WHERE id=?`, status, nodeID) + db.Exec(`INSERT OR REPLACE INTO uptime(node_id,date,status) VALUES(?,?,?)`, nodeID, today, status) + + // Store latest metrics as JSON in a simple kv table (lazy: reuse uptime body col if available, or just log) + log.Printf("OTLP [%s] cpu=%.1f%% mem=%.1f%% disk=%.1f%% → %s", + nodeID, cpu*100, mem*100, disk*100, status) + } + + w.Header().Set("Content-Type", "application/json") + fmt.Fprintln(w, `{"partialSuccess":{}}`) +} + +func main() { + fmt.Printf("=== Vault1984 Dashboard %s ===\n", version) + + + var err error + db, err = sql.Open("sqlite", "./status.db") + if err != nil { + log.Fatalf("open db: %v", err) + } + db.SetMaxOpenConns(1) // SQLite: single writer + + if _, err := db.Exec(schema); err != nil { + log.Fatalf("schema: %v", err) + } + seedData() + log.Println("DB ready: status.db") + + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + tpl.Execute(w, buildPage()) + }) + + http.HandleFunc("/api/status", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + nodes := loadNodes() + ok := true + for _, n := range nodes { + if n.Status == "outage" || n.Status == "degraded" { + ok = false + break + } + } + status := "operational" + if !ok { + status = "degraded" + } + json.NewEncoder(w).Encode(map[string]interface{}{ + "status": status, + "updated": time.Now().UTC().Format(time.RFC3339), + "nodes": len(nodes), + }) + }) + + http.HandleFunc("/api/chat", handleChat) + http.HandleFunc("/v1/metrics", handleOTLP) + http.HandleFunc("/download/agent", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Disposition", "attachment; filename=v1984-agent") + w.Header().Set("Content-Type", "application/octet-stream") + http.ServeFile(w, r, "./v1984-agent") + }) + http.HandleFunc("/download/agent-arm64", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Disposition", "attachment; filename=v1984-agent") + w.Header().Set("Content-Type", "application/octet-stream") + http.ServeFile(w, r, "./v1984-agent-arm64") + }) + http.HandleFunc("/download/vault1984", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Disposition", "attachment; filename=vault1984") + w.Header().Set("Content-Type", "application/octet-stream") + http.ServeFile(w, r, "./vault1984") + }) + http.HandleFunc("/download/vault1984-arm64", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Disposition", "attachment; filename=vault1984") + w.Header().Set("Content-Type", "application/octet-stream") + http.ServeFile(w, r, "./vault1984-arm64") + }) + + http.HandleFunc("/api/nodes", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "nodes": loadNodes(), + "incidents": loadIncidents(), + }) + }) + + // Admin routes — Tailscale only + http.HandleFunc("/admin", tailscaleOnly(handleAdminGUI)) + http.HandleFunc("/admin/incident/new", tailscaleOnly(handleAdminNewIncident)) + http.HandleFunc("/admin/incident/update", tailscaleOnly(handleAdminUpdateIncident)) + http.HandleFunc("/admin/node/status", tailscaleOnly(handleAdminNodeStatus)) + + // vault1984 binary telemetry ingestion + http.HandleFunc("/telemetry", func(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { + w.WriteHeader(405) + return + } + // Optional token auth: if TELEMETRY_TOKEN env is set, enforce it + if tok := os.Getenv("TELEMETRY_TOKEN"); tok != "" { + if r.Header.Get("X-Telemetry-Token") != tok && r.URL.Query().Get("token") != tok { + w.WriteHeader(401) + fmt.Fprintln(w, `{"error":"unauthorized"}`) + return + } + } + var p struct { + Version string `json:"version"` + Hostname string `json:"hostname"` + Uptime int64 `json:"uptime_seconds"` + System struct { + CPUPercent float64 `json:"cpu_percent"` + MemTotalMB int64 `json:"memory_total_mb"` + MemUsedMB int64 `json:"memory_used_mb"` + DiskTotalMB int64 `json:"disk_total_mb"` + DiskUsedMB int64 `json:"disk_used_mb"` + Load1m float64 `json:"load_1m"` + } `json:"system"` + Vaults struct { + Count int `json:"count"` + TotalSizeMB float64 `json:"total_size_mb"` + TotalEntries int `json:"total_entries"` + } `json:"vaults"` + Mode string `json:"mode"` + } + if err := json.NewDecoder(r.Body).Decode(&p); err != nil { + w.WriteHeader(400) + fmt.Fprintln(w, `{"error":"bad request"}`) + return + } + // Map hostname → node_id + // Priority: hostname matches node id → hostname matches node name → source IP → hostname as-is + nodeID := p.Hostname + var knownID string + // Try direct id match + db.QueryRow(`SELECT id FROM nodes WHERE id=?`, nodeID).Scan(&knownID) + if knownID == "" { + // Try matching by hostname against node name (case-insensitive) + db.QueryRow(`SELECT id FROM nodes WHERE lower(name)=lower(?) OR lower(id)=lower(?)`, nodeID, nodeID).Scan(&knownID) + } + if knownID == "" { + // Try matching by source IP (strips port) + remoteIP := r.RemoteAddr + if host, _, ok := strings.Cut(remoteIP, ":"); ok { + remoteIP = host + } + db.QueryRow(`SELECT id FROM nodes WHERE ip=?`, remoteIP).Scan(&knownID) + } + if knownID != "" { + nodeID = knownID + } + db.Exec(`INSERT INTO telemetry(node_id,version,hostname,uptime_seconds,cpu_percent,memory_total_mb,memory_used_mb,disk_total_mb,disk_used_mb,load_1m,vault_count,vault_size_mb,vault_entries,mode) + VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)`, + nodeID, p.Version, p.Hostname, p.Uptime, + p.System.CPUPercent, p.System.MemTotalMB, p.System.MemUsedMB, + p.System.DiskTotalMB, p.System.DiskUsedMB, p.System.Load1m, + p.Vaults.Count, p.Vaults.TotalSizeMB, p.Vaults.TotalEntries, + p.Mode, + ) + // Keep last 1000 rows per node (scoped to node_id) + db.Exec(`DELETE FROM telemetry WHERE node_id=? AND id NOT IN (SELECT id FROM telemetry WHERE node_id=? ORDER BY id DESC LIMIT 1000)`, nodeID, nodeID) + // Drive public status page: update node status + today's uptime record + today := time.Now().Format("2006-01-02") + db.Exec(`UPDATE nodes SET status='operational' WHERE id=?`, nodeID) + db.Exec(`INSERT OR REPLACE INTO uptime(node_id,date,status) VALUES(?,?,'operational')`, nodeID, today) + log.Printf("telemetry: %s (v%s) uptime=%ds cpu=%.1f%%", nodeID, p.Version, p.Uptime, p.System.CPUPercent) + w.Header().Set("Content-Type", "application/json") + fmt.Fprintln(w, `{"ok":true}`) + }) + + // Latest telemetry snapshot per node + http.HandleFunc("/api/telemetry", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Cache-Control", "no-store, no-cache, must-revalidate") + rows, err := db.Query(` + SELECT t.node_id, t.received_at, t.version, t.hostname, t.uptime_seconds, + t.cpu_percent, t.memory_total_mb, t.memory_used_mb, + t.disk_total_mb, t.disk_used_mb, t.load_1m, + t.vault_count, t.vault_size_mb, t.vault_entries, t.mode + FROM telemetry t + INNER JOIN ( + SELECT node_id, MAX(id) AS max_id FROM telemetry GROUP BY node_id + ) latest ON t.id = latest.max_id + ORDER BY t.node_id + `) + if err != nil { + w.WriteHeader(500) + return + } + defer rows.Close() + type TRow struct { + NodeID string `json:"node_id"` + ReceivedAt int64 `json:"received_at"` + Version string `json:"version"` + Hostname string `json:"hostname"` + Uptime int64 `json:"uptime_seconds"` + CPUPct float64 `json:"cpu_percent"` + MemTotal int64 `json:"memory_total_mb"` + MemUsed int64 `json:"memory_used_mb"` + DiskTotal int64 `json:"disk_total_mb"` + DiskUsed int64 `json:"disk_used_mb"` + Load1m float64 `json:"load_1m"` + VaultCount int `json:"vault_count"` + VaultSize float64 `json:"vault_size_mb"` + VaultEntries int `json:"vault_entries"` + Mode string `json:"mode"` + } + var result []TRow + for rows.Next() { + var t TRow + rows.Scan(&t.NodeID, &t.ReceivedAt, &t.Version, &t.Hostname, &t.Uptime, + &t.CPUPct, &t.MemTotal, &t.MemUsed, + &t.DiskTotal, &t.DiskUsed, &t.Load1m, + &t.VaultCount, &t.VaultSize, &t.VaultEntries, &t.Mode) + result = append(result, t) + } + json.NewEncoder(w).Encode(map[string]interface{}{"telemetry": result}) + }) + + // Telemetry history for a node: /api/telemetry/history?node=virginia&limit=60 + http.HandleFunc("/api/telemetry/history", tailscaleOnly(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + nodeID := r.URL.Query().Get("node") + limit := 60 + if l := r.URL.Query().Get("limit"); l != "" { + fmt.Sscanf(l, "%d", &limit) + } + if limit > 1000 { limit = 1000 } + type TRow struct { + ReceivedAt int64 `json:"ts"` + CPUPct float64 `json:"cpu"` + MemUsedMB int64 `json:"mem_used_mb"` + MemTotalMB int64 `json:"mem_total_mb"` + DiskUsedMB int64 `json:"disk_used_mb"` + DiskTotalMB int64 `json:"disk_total_mb"` + Load1m float64 `json:"load_1m"` + UptimeSec int64 `json:"uptime_seconds"` + } + rows, err := db.Query(` + SELECT received_at, cpu_percent, memory_used_mb, memory_total_mb, + disk_used_mb, disk_total_mb, load_1m, uptime_seconds + FROM telemetry WHERE node_id=? + ORDER BY id DESC LIMIT ?`, nodeID, limit) + if err != nil { w.WriteHeader(500); return } + defer rows.Close() + var result []TRow + for rows.Next() { + var t TRow + rows.Scan(&t.ReceivedAt, &t.CPUPct, &t.MemUsedMB, &t.MemTotalMB, + &t.DiskUsedMB, &t.DiskTotalMB, &t.Load1m, &t.UptimeSec) + result = append(result, t) + } + // Reverse to chronological order + for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 { + result[i], result[j] = result[j], result[i] + } + json.NewEncoder(w).Encode(map[string]interface{}{"node": nodeID, "history": result}) + })) + + // Internal NOC ops dashboard — Tailscale only + http.HandleFunc("/ops", tailscaleOnly(handleOpsDashboard)) + + // POPs push metrics here + http.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { + w.WriteHeader(405) + return + } + var p struct { + ID string `json:"id"` + Status string `json:"status"` + } + if err := json.NewDecoder(r.Body).Decode(&p); err != nil { + w.WriteHeader(400) + return + } + today := time.Now().Format("2006-01-02") + db.Exec(`INSERT OR REPLACE INTO uptime(node_id,date,status) VALUES(?,?,?)`, p.ID, today, p.Status) + db.Exec(`UPDATE nodes SET status=? WHERE id=?`, p.Status, p.ID) + log.Printf("metrics: %s → %s", p.ID, p.Status) + w.Header().Set("Content-Type", "application/json") + fmt.Fprintln(w, `{"ok":true}`) + }) + + // Start POP health monitor goroutine + go runHealthMonitor() + + log.Println("Listening on :8080") + log.Fatal(http.ListenAndServe(":8080", nil)) +} + +// runHealthMonitor posts to Kuma push URL every 60s and webhooks on problems. +// Configure via env: KUMA_PUSH_URL, ALERT_WEBHOOK_URL +func runHealthMonitor() { + kumaURL := os.Getenv("KUMA_PUSH_URL") + webhookURL := os.Getenv("ALERT_WEBHOOK_URL") + expectedNodes := []string{"virginia", "singapore", "zurich"} + staleThreshold := int64(300) // 5 min — Kuma fires after 2 missed 60s beats + alerting := false // track state to avoid webhook spam + + if kumaURL == "" { + log.Println("health monitor: KUMA_PUSH_URL not set — monitoring disabled") + return + } + log.Printf("health monitor: posting to Kuma every 60s, alert webhook=%v", webhookURL != "") + + ticker := time.NewTicker(60 * time.Second) + defer ticker.Stop() + + doPost := func(url, body string) { + req, _ := http.NewRequest("GET", url, nil) + if body != "" { + req, _ = http.NewRequest("POST", url, + strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + } + http.DefaultClient.Timeout = 10 * time.Second + resp, err := http.DefaultClient.Do(req) + if err != nil { + log.Printf("health monitor: POST error: %v", err) + return + } + resp.Body.Close() + } + + for range ticker.C { + now := time.Now().Unix() + type row struct { + nodeID string + receivedAt int64 + } + rows, err := db.Query(` + SELECT t.node_id, t.received_at + FROM telemetry t + INNER JOIN (SELECT node_id, MAX(id) AS max_id FROM telemetry GROUP BY node_id) latest + ON t.id = latest.max_id + `) + if err != nil { + log.Printf("health monitor: db error: %v", err) + continue + } + + latest := map[string]int64{} + for rows.Next() { + var r row + rows.Scan(&r.nodeID, &r.receivedAt) + latest[r.nodeID] = r.receivedAt + } + rows.Close() + + var problems []string + for _, node := range expectedNodes { + ts, ok := latest[node] + if !ok { + problems = append(problems, node+": no data") + } else if now-ts > staleThreshold { + problems = append(problems, fmt.Sprintf("%s: silent %ds", node, now-ts)) + } + } + + if len(problems) > 0 { + // Don't push to Kuma — missing beat triggers Kuma alert + log.Printf("health monitor: PROBLEM — %v", problems) + if webhookURL != "" && !alerting { + alerting = true + body := fmt.Sprintf(`{"text":"⚠️ vault1984 POP alert\n%s"}`, + strings.Join(problems, "\\n")) + doPost(webhookURL, body) + } + } else { + // All healthy — push heartbeat to Kuma + alerting = false + oldest := int64(0) + for _, ts := range latest { + if age := now - ts; age > oldest { + oldest = age + } + } + pushURL := fmt.Sprintf("%s?status=up&msg=OK+oldest=%ds&ping=%d", kumaURL, oldest, oldest) + doPost(pushURL, "") + log.Printf("health monitor: OK — pushed to Kuma (oldest beat %ds)", oldest) + } + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9cc47ab --- /dev/null +++ b/go.mod @@ -0,0 +1,18 @@ +module vault1984-chat + +go 1.24.0 + +require ( + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/gorilla/websocket v1.5.3 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect + golang.org/x/sys v0.37.0 // indirect + modernc.org/libc v1.67.6 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect + modernc.org/sqlite v1.46.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..9fe1ea7 --- /dev/null +++ b/go.sum @@ -0,0 +1,25 @@ +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY= +golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI= +modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU= +modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA= diff --git a/scripts/deploy-pop.sh b/scripts/deploy-pop.sh new file mode 100755 index 0000000..beddbe5 --- /dev/null +++ b/scripts/deploy-pop.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# vault1984 POP Deploy Script — canonical, idempotent +# Usage: ./deploy-pop.sh +# Example: ./deploy-pop.sh virginia us-east-1 +set -euo pipefail + +NODE_ID="${1:?usage: deploy-pop.sh }" +REGION="${2:?usage: deploy-pop.sh }" +HQ_URL="http://185.218.204.47:8080" +IAM_PROFILE="vault1984-ssm-profile" +INSTANCE_TYPE="t4g.micro" + +echo "=== vault1984 POP Deploy: $NODE_ID in $REGION ===" + +# 1. Find latest AL2 arm64 AMI +echo "[1/6] Finding AMI..." +AMI_ID=$(aws --region "$REGION" ec2 describe-images \ + --owners amazon \ + --filters "Name=name,Values=amzn2-ami-kernel-5.10-hvm-*-arm64-gp2" "Name=state,Values=available" \ + --query "sort_by(Images, &CreationDate)[-1].ImageId" --output text) +echo " AMI: $AMI_ID" + +# 2. Get/create vault1984-pop security group (outbound only, NO inbound) +echo "[2/6] Security group..." +VPC_ID=$(aws --region "$REGION" ec2 describe-vpcs \ + --filters "Name=isDefault,Values=true" --query "Vpcs[0].VpcId" --output text) +SUBNET_ID=$(aws --region "$REGION" ec2 describe-subnets \ + --filters "Name=defaultForAz,Values=true" --query "Subnets[0].SubnetId" --output text) + +# Look up existing SG — treat empty or literal "None" as missing +SG_ID=$(aws --region "$REGION" ec2 describe-security-groups \ + --filters "Name=group-name,Values=vault1984-pop" "Name=vpc-id,Values=$VPC_ID" \ + --query "SecurityGroups[0].GroupId" --output text 2>/dev/null || true) + +if [[ -z "$SG_ID" || "$SG_ID" == "None" ]]; then + echo " Creating vault1984-pop SG..." + SG_ID=$(aws --region "$REGION" ec2 create-security-group \ + --group-name vault1984-pop \ + --description "Vault1984 POP - outbound only, no inbound" \ + --vpc-id "$VPC_ID" --query "GroupId" --output text) + # Remove the default allow-all inbound rule AWS adds to new SGs + aws --region "$REGION" ec2 revoke-security-group-ingress \ + --group-id "$SG_ID" --protocol -1 --cidr 0.0.0.0/0 2>/dev/null || true + aws --region "$REGION" ec2 revoke-security-group-ingress \ + --group-id "$SG_ID" --protocol -1 --source-group "$SG_ID" 2>/dev/null || true + echo " Created SG: $SG_ID" +else + echo " Existing SG: $SG_ID" +fi + +# Verify SG has no inbound rules (safety check) +INBOUND=$(aws --region "$REGION" ec2 describe-security-groups \ + --group-ids "$SG_ID" --query "SecurityGroups[0].IpPermissions" --output text) +if [[ -n "$INBOUND" ]]; then + echo " WARNING: vault1984-pop SG has inbound rules — check manually:" + echo " $INBOUND" +fi + +# 3. Launch instance +echo "[3/6] Launching $INSTANCE_TYPE..." +INSTANCE_ID=$(aws --region "$REGION" ec2 run-instances \ + --image-id "$AMI_ID" \ + --instance-type "$INSTANCE_TYPE" \ + --subnet-id "$SUBNET_ID" \ + --security-group-ids "$SG_ID" \ + --iam-instance-profile Name="$IAM_PROFILE" \ + --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=vault1984-$NODE_ID},{Key=vault1984-node,Value=$NODE_ID}]" \ + --query "Instances[0].InstanceId" --output text) +echo " Instance: $INSTANCE_ID" + +# Verify SG assignment immediately after launch +ASSIGNED_SG=$(aws --region "$REGION" ec2 describe-instances \ + --instance-ids "$INSTANCE_ID" \ + --query "Reservations[0].Instances[0].SecurityGroups[0].GroupName" --output text) +if [[ "$ASSIGNED_SG" != "vault1984-pop" ]]; then + echo "ERROR: Instance launched with wrong SG: $ASSIGNED_SG (expected vault1984-pop)" + echo " Terminating instance to avoid misconfigured POP." + aws --region "$REGION" ec2 terminate-instances --instance-ids "$INSTANCE_ID" >/dev/null + exit 1 +fi +echo " SG verified: $ASSIGNED_SG ✓" + +# 4. Wait for SSM +echo "[4/6] Waiting for SSM (~2-3 min)..." +PUBLIC_IP="" +for i in $(seq 1 30); do + STATUS=$(aws --region "$REGION" ssm describe-instance-information \ + --filters "Key=InstanceIds,Values=$INSTANCE_ID" \ + --query "InstanceInformationList[0].PingStatus" --output text 2>/dev/null || echo "None") + if [[ "$STATUS" == "Online" ]]; then + PUBLIC_IP=$(aws --region "$REGION" ec2 describe-instances \ + --instance-ids "$INSTANCE_ID" \ + --query "Reservations[0].Instances[0].PublicIpAddress" --output text) + echo " SSM online after ~${i}0s — IP: $PUBLIC_IP" + break + fi + echo " ($i/30) $STATUS..." + sleep 10 +done +[[ "$STATUS" == "Online" ]] || { echo "ERROR: SSM never came online"; exit 1; } + +# 5. Install vault1984 + apply hardening inline (same approach as update-pop.sh) +echo "[5/6] Deploying vault1984 and hardening..." + +CMD_ID=$(aws --region "$REGION" ssm send-command \ + --instance-ids "$INSTANCE_ID" \ + --document-name AWS-RunShellScript \ + --parameters commands="[ + \"set -e\", + \"hostnamectl set-hostname $NODE_ID\", + \"curl -sfo /usr/local/bin/vault1984 $HQ_URL/download/vault1984-arm64\", + \"chmod +x /usr/local/bin/vault1984\", + \"mkdir -p /var/lib/vault1984\", + \"printf '[Unit]\\nDescription=Vault1984\\nAfter=network.target\\n\\n[Service]\\nEnvironment=NODE_ID=$NODE_ID\\nExecStart=/usr/local/bin/vault1984 --telemetry-freq=60 --telemetry-host=$HQ_URL/telemetry\\nRestart=always\\nRestartSec=10\\nWorkingDirectory=/var/lib/vault1984\\n\\n[Install]\\nWantedBy=multi-user.target\\n' > /etc/systemd/system/vault1984.service\", + \"systemctl daemon-reload && systemctl enable vault1984 && systemctl start vault1984\", + \"systemctl is-active vault1984 && echo 'vault1984: OK' || echo 'vault1984: FAILED'\", + \"amazon-linux-extras install epel -y -q > /dev/null 2>&1 || true\", + \"yum install -y -q fail2ban > /dev/null 2>&1 || true\", + \"printf '[DEFAULT]\\nbantime = 86400\\nfindtime = 600\\nmaxretry = 3\\nignoreip = 127.0.0.1/8 ::1\\n\\n[sshd]\\nenabled = true\\nport = ssh\\nfilter = sshd\\nlogpath = /var/log/secure\\nmaxretry = 3\\nbantime = 86400\\n' > /etc/fail2ban/jail.local\", + \"systemctl enable fail2ban && systemctl restart fail2ban\", + \"sleep 5 && fail2ban-client status sshd\", + \"for svc in postfix rpcbind sshd; do systemctl stop \$svc 2>/dev/null; systemctl disable \$svc 2>/dev/null; done\", + \"yum install -y -q chrony > /dev/null 2>&1 && systemctl enable chronyd && systemctl start chronyd || true\", + \"printf 'net.ipv4.tcp_syncookies=1\\nnet.ipv4.icmp_echo_ignore_broadcasts=1\\nnet.ipv4.conf.all.rp_filter=1\\n' > /etc/sysctl.d/99-vault1984.conf && sysctl --system -q\", + \"systemctl enable firewalld && systemctl start firewalld\", + \"firewall-cmd --permanent --remove-service=ssh 2>/dev/null || true\", + \"firewall-cmd --permanent --add-port=1984/tcp && firewall-cmd --reload\", + \"echo 'DEPLOY_COMPLETE'\" + ]" \ + --query "Command.CommandId" --output text) + +for i in $(seq 1 30); do + R=$(aws --region "$REGION" ssm get-command-invocation \ + --command-id "$CMD_ID" --instance-id "$INSTANCE_ID" \ + --query "{S:Status,O:StandardOutputContent,E:StandardErrorContent}" --output json 2>/dev/null || echo '{"S":"Pending"}') + S=$(echo "$R" | python3 -c "import json,sys; print(json.load(sys.stdin).get('S','?'))") + if [[ "$S" == "Success" || "$S" == "Failed" || "$S" == "TimedOut" ]]; then + echo "$R" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('O','')); e=d.get('E',''); e and print('STDERR:',e)" + [[ "$S" == "Success" ]] || { echo "ERROR: deploy command $S"; exit 1; } + break + fi + echo " ($i/30) $S..." + sleep 10 +done + +# 6. Final verification +echo "[6/6] Verification..." +VERIFY_ID=$(aws --region "$REGION" ssm send-command \ + --instance-ids "$INSTANCE_ID" \ + --document-name AWS-RunShellScript \ + --parameters commands="[ + \"echo 'hostname:' $(hostname)\", + \"systemctl is-active vault1984 && echo 'vault1984: active' || echo 'vault1984: FAILED'\", + \"systemctl is-active fail2ban && echo 'fail2ban: active' || echo 'fail2ban: FAILED'\", + \"fail2ban-client status sshd 2>/dev/null && echo 'fail2ban-sshd: OK' || echo 'fail2ban-sshd: FAILED'\", + \"firewall-cmd --list-ports 2>/dev/null | grep -q 1984 && echo 'firewall: OK' || echo 'firewall: FAILED'\", + \"systemctl is-active rpcbind 2>/dev/null && echo 'rpcbind: WARNING still active' || echo 'rpcbind: disabled OK'\" + ]" \ + --query "Command.CommandId" --output text) +sleep 10 +aws --region "$REGION" ssm get-command-invocation \ + --command-id "$VERIFY_ID" --instance-id "$INSTANCE_ID" \ + --query "StandardOutputContent" --output text + +echo "" +echo "=== Deploy complete ===" +echo " Node: $NODE_ID" +echo " Region: $REGION" +echo " Instance: $INSTANCE_ID" +echo " IP: $PUBLIC_IP" +echo " SG: vault1984-pop ($SG_ID)" +echo "" +echo "Add to POPS in update-pop.sh:" +echo " [\"$NODE_ID\"]=\"$REGION:$INSTANCE_ID\"" diff --git a/scripts/harden-pop.sh b/scripts/harden-pop.sh new file mode 100755 index 0000000..660a119 --- /dev/null +++ b/scripts/harden-pop.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# Vault1984 POP Hardening Script — idempotent, run via SSM +# Usage: bash harden-pop.sh +# Can be re-run safely at any time to re-apply hardening. +set -euo pipefail + +echo "=== Vault1984 POP Hardening ===" + +# 1. Update system +echo "[1/8] Updating system..." +yum update -y -q + +# 2. Install fail2ban (via EPEL) +echo "[2/8] Installing fail2ban..." +amazon-linux-extras install epel -y -q 2>/dev/null || true +yum install -y -q fail2ban +systemctl enable fail2ban + +# 3. Configure fail2ban — sshd jail +# NOTE: jail section must be [sshd] (lowercase), not [ssHD] +echo "[3/8] Configuring fail2ban..." +cat > /etc/fail2ban/jail.local << 'EOF' +[DEFAULT] +bantime = 86400 +findtime = 600 +maxretry = 3 +ignoreip = 127.0.0.1/8 ::1 + +[sshd] +enabled = true +port = ssh +filter = sshd +logpath = /var/log/secure +maxretry = 3 +bantime = 86400 +EOF +systemctl restart fail2ban +sleep 2 +fail2ban-client status sshd + +# 4. NTP / timezone +echo "[4/8] Configuring NTP..." +timedatectl set-timezone UTC +yum install -y -q chrony +systemctl enable chronyd +systemctl start chronyd + +# 5. Disable unnecessary services (SSH not needed — managed via SSM only) +echo "[5/8] Disabling unnecessary services..." +for svc in postfix rpcbind rpcbind.socket sshd; do + systemctl stop "$svc" 2>/dev/null || true + systemctl disable "$svc" 2>/dev/null || true +done + +# 6. Kernel hardening +echo "[6/8] Kernel hardening..." +cat > /etc/sysctl.d/99-vault1984.conf << 'EOF' +net.ipv4.tcp_syncookies = 1 +net.ipv4.icmp_echo_ignore_broadcasts = 1 +net.ipv4.conf.all.rp_filter = 1 +net.ipv4.conf.default.rp_filter = 1 +EOF +sysctl --system -q + +# 7. Firewall — allow only vault1984 port (SSM doesn't need port 22) +echo "[7/8] Configuring firewall..." +systemctl enable firewalld +systemctl start firewalld +firewall-cmd --permanent --remove-service=ssh 2>/dev/null || true +firewall-cmd --permanent --remove-service=dhcpv6-client 2>/dev/null || true +firewall-cmd --permanent --add-port=1984/tcp +firewall-cmd --reload +firewall-cmd --list-all + +# 8. sshd disabled — POPs are managed exclusively via AWS SSM +echo "[8/8] Disabling sshd (SSM-managed, no SSH needed)..." +systemctl stop sshd 2>/dev/null || true +systemctl disable sshd 2>/dev/null || true + +echo "" +echo "=== Hardening complete ===" +echo " fail2ban: $(fail2ban-client status | grep 'Jail list' | sed 's/.*Jail list:\s*//')" +echo " firewall: $(firewall-cmd --list-ports)" +echo " rpcbind: $(systemctl is-active rpcbind 2>/dev/null || echo inactive)" +echo " timezone: $(timedatectl | grep 'Time zone')" diff --git a/scripts/update-pop.sh b/scripts/update-pop.sh new file mode 100755 index 0000000..d73c85d --- /dev/null +++ b/scripts/update-pop.sh @@ -0,0 +1,185 @@ +#!/bin/bash +# vault1984 POP Update Script — push latest binary + optional re-harden +# Usage: ./update-pop.sh [--harden] [ ] +# --harden Also re-apply hardening (fail2ban, firewall, sysctl, etc.) +# no args Update all known POPs (binary + service only) +# Example: ./update-pop.sh --harden virginia us-east-1 i-01613f301bc47418e +# ./update-pop.sh --harden (all POPs) +set -euo pipefail + +HQ_URL="http://185.218.204.47:8080" +APPLY_HARDEN=false +HARDEN_SCRIPT="$(dirname "$0")/harden-pop.sh" + +# Known POPs — update this list as new POPs are added +declare -A POPS=( + ["virginia"]="us-east-1:i-01613f301bc47418e" + ["singapore"]="ap-southeast-1:i-03285633c3dcc64e1" + ["zurich"]="eu-central-2:i-0b907cebe7978c7c3" + ["saopaulo"]="sa-east-1:i-06485da3657e4a89b" +) + +# Parse args +ARGS=() +for arg in "$@"; do + [[ "$arg" == "--harden" ]] && APPLY_HARDEN=true || ARGS+=("$arg") +done +set -- "${ARGS[@]+"${ARGS[@]}"}" + +ssm_run() { + local region=$1 iid=$2 + shift 2 + local cmds=("$@") + local json_cmds + json_cmds=$(python3 -c "import json,sys; print(json.dumps(sys.argv[1:]))" "${cmds[@]}") + aws --region "$region" ssm send-command \ + --instance-ids "$iid" \ + --document-name AWS-RunShellScript \ + --parameters "commands=$json_cmds" \ + --query "Command.CommandId" --output text +} + +ssm_wait() { + local region=$1 iid=$2 cmd_id=$3 label=$4 + for i in $(seq 1 18); do + R=$(aws --region "$region" ssm get-command-invocation \ + --command-id "$cmd_id" --instance-id "$iid" \ + --query "{S:Status,O:StandardOutputContent,E:StandardErrorContent}" --output json 2>/dev/null || echo '{"S":"Pending"}') + S=$(echo "$R" | python3 -c "import json,sys; print(json.load(sys.stdin).get('S','?'))") + if [[ "$S" == "Success" || "$S" == "Failed" || "$S" == "TimedOut" ]]; then + echo "$R" | python3 -c " +import json,sys +d=json.load(sys.stdin) +out=d.get('O','').strip() +err=d.get('E','').strip() +if out: print(out) +if err: print('STDERR:', err) +" + [[ "$S" == "Success" ]] && return 0 || { echo "FAILED ($S)"; return 1; } + fi + echo " ($i) $S..." + sleep 10 + done + echo "TIMEOUT"; return 1 +} + +verify_sg() { + local region=$1 iid=$2 node=$3 + ASSIGNED=$(aws --region "$region" ec2 describe-instances \ + --instance-ids "$iid" \ + --query "Reservations[0].Instances[0].SecurityGroups[0].GroupName" --output text 2>/dev/null || echo "unknown") + if [[ "$ASSIGNED" != "vault1984-pop" ]]; then + echo " ⚠️ SG mismatch on $node: got '$ASSIGNED', expected 'vault1984-pop'" + echo " Attempting to reconcile..." + # Get the correct vault1984-pop SG ID for this region + VPC_ID=$(aws --region "$region" ec2 describe-vpcs \ + --filters "Name=isDefault,Values=true" --query "Vpcs[0].VpcId" --output text) + SG_ID=$(aws --region "$region" ec2 describe-security-groups \ + --filters "Name=group-name,Values=vault1984-pop" "Name=vpc-id,Values=$VPC_ID" \ + --query "SecurityGroups[0].GroupId" --output text 2>/dev/null || echo "None") + if [[ -z "$SG_ID" || "$SG_ID" == "None" ]]; then + echo " vault1984-pop SG doesn't exist in $region — creating..." + SG_ID=$(aws --region "$region" ec2 create-security-group \ + --group-name vault1984-pop \ + --description "Vault1984 POP - outbound only, no inbound" \ + --vpc-id "$VPC_ID" --query "GroupId" --output text) + aws --region "$region" ec2 revoke-security-group-ingress \ + --group-id "$SG_ID" --protocol -1 --cidr 0.0.0.0/0 2>/dev/null || true + aws --region "$region" ec2 revoke-security-group-ingress \ + --group-id "$SG_ID" --protocol -1 --source-group "$SG_ID" 2>/dev/null || true + echo " Created SG: $SG_ID" + fi + aws --region "$region" ec2 modify-instance-attribute \ + --instance-id "$iid" --groups "$SG_ID" + echo " ✓ SG reconciled → vault1984-pop ($SG_ID)" + else + echo " ✓ SG: vault1984-pop" + fi +} + +update_pop() { + local NODE_ID="$1" REGION="$2" INSTANCE_ID="$3" + echo "" + echo "━━━ $NODE_ID ($REGION / $INSTANCE_ID) ━━━" + + # 1. SG verification + reconciliation + echo " Verifying security group..." + verify_sg "$REGION" "$INSTANCE_ID" "$NODE_ID" + + # 2. Binary + service update + echo " Updating vault1984 binary..." + CMD_ID=$(ssm_run "$REGION" "$INSTANCE_ID" \ + "set -e" \ + "curl -sfo /usr/local/bin/vault1984.new $HQ_URL/download/vault1984-arm64" \ + "chmod +x /usr/local/bin/vault1984.new" \ + "mv /usr/local/bin/vault1984.new /usr/local/bin/vault1984" \ + "printf '[Unit]\nDescription=Vault1984\nAfter=network.target\n\n[Service]\nEnvironment=NODE_ID=$NODE_ID\nExecStart=/usr/local/bin/vault1984 --telemetry-freq=60 --telemetry-host=$HQ_URL/telemetry\nRestart=always\nRestartSec=10\nWorkingDirectory=/var/lib/vault1984\n\n[Install]\nWantedBy=multi-user.target\n' > /etc/systemd/system/vault1984.service" \ + "mkdir -p /var/lib/vault1984" \ + "systemctl stop v1984-agent 2>/dev/null || true" \ + "systemctl disable v1984-agent 2>/dev/null || true" \ + "systemctl daemon-reload && systemctl enable vault1984 && systemctl restart vault1984" \ + "sleep 2" \ + "systemctl is-active vault1984 && echo 'vault1984: OK' || echo 'vault1984: FAILED'") + ssm_wait "$REGION" "$INSTANCE_ID" "$CMD_ID" "binary update" + + # 3. Hardening re-apply (optional, --harden flag) + if $APPLY_HARDEN; then + echo " Re-applying hardening..." + HARDEN_CONTENT=$(cat "$HARDEN_SCRIPT") + CMD_ID=$(aws --region "$REGION" ssm send-command \ + --instance-ids "$INSTANCE_ID" \ + --document-name AWS-RunShellScript \ + --parameters "commands=[\"bash -s << 'HARDEN'\\n$(echo "$HARDEN_CONTENT" | python3 -c "import sys; print(sys.stdin.read().replace('\\\\', '\\\\\\\\').replace('\"', '\\\\\"'))")\\nHARDEN\"]" \ + --query "Command.CommandId" --output text 2>/dev/null || true) + # Simpler approach: write script then run it + CMD_ID=$(aws --region "$REGION" ssm send-command \ + --instance-ids "$INSTANCE_ID" \ + --document-name AWS-RunShellScript \ + --parameters commands="[ + \"amazon-linux-extras install epel -y -q > /dev/null 2>&1 || true\", + \"yum install -y -q fail2ban > /dev/null 2>&1 || true\", + \"printf '[DEFAULT]\\nbantime = 86400\\nfindtime = 600\\nmaxretry = 3\\nignoreip = 127.0.0.1/8 ::1\\n\\n[sshd]\\nenabled = true\\nport = ssh\\nfilter = sshd\\nlogpath = /var/log/secure\\nmaxretry = 3\\nbantime = 86400\\n' > /etc/fail2ban/jail.local\", + \"systemctl enable fail2ban && systemctl restart fail2ban\", + \"sleep 5 && fail2ban-client status sshd\", + \"for svc in postfix rpcbind sshd; do systemctl stop \$svc 2>/dev/null; systemctl disable \$svc 2>/dev/null; done; echo 'services disabled'\", + \"echo 'Hardening applied'\" + ]" \ + --query "Command.CommandId" --output text) + ssm_wait "$REGION" "$INSTANCE_ID" "$CMD_ID" "hardening" + fi + + # 4. Verify health + echo " Verifying node health..." + CMD_ID=$(ssm_run "$REGION" "$INSTANCE_ID" \ + "echo '--- vault1984 ---'" \ + "systemctl is-active vault1984 && echo 'OK' || echo 'FAILED'" \ + "echo '--- fail2ban ---'" \ + "systemctl is-active fail2ban && echo 'OK' || echo 'FAILED'" \ + "echo '--- fail2ban jails ---'" \ + "fail2ban-client status 2>/dev/null || echo 'no jails'" \ + "echo '--- sshd ---'" \ + "systemctl is-active sshd 2>/dev/null && echo 'WARNING: sshd still active' || echo 'disabled OK'" \ + "echo '--- rpcbind ---'" \ + "systemctl is-active rpcbind 2>/dev/null && echo 'WARNING: still active' || echo 'disabled OK'" \ + "echo '--- open ports ---'" \ + "ss -tlnp | grep LISTEN") + ssm_wait "$REGION" "$INSTANCE_ID" "$CMD_ID" "verify" + + echo " ✓ $NODE_ID done" +} + +# Main +if [ ${#ARGS[@]} -eq 3 ]; then + update_pop "${ARGS[0]}" "${ARGS[1]}" "${ARGS[2]}" +elif [ ${#ARGS[@]} -eq 0 ]; then + echo "=== vault1984 POP Update — all nodes$(${APPLY_HARDEN} && echo ' (+harden)' || echo '') ===" + for NODE_ID in "${!POPS[@]}"; do + IFS=: read -r REGION INSTANCE_ID <<< "${POPS[$NODE_ID]}" + update_pop "$NODE_ID" "$REGION" "$INSTANCE_ID" + done + echo "" + echo "=== All POPs updated ===" +else + echo "Usage: $0 [--harden] [ ]" + exit 1 +fi