package main import ( "bytes" "database/sql" "encoding/json" "fmt" "html/template" "io" "log" "net/http" "os" "sort" "strings" "time" _ "modernc.org/sqlite" ) const ( fireworksKey = "fw_RVcDe4c6mN4utKLsgA7hTm" fireworksModel = "accounts/fireworks/models/gpt-oss-20b" fireworksURL = "https://api.fireworks.ai/inference/v1/chat/completions" ) var systemPrompt = `You are the Vault1984 NOC assistant. Vault1984 is a global network of security POPs (Points of Presence) across 21 regions. Your job is to: - Help users report network problems or outages - Answer questions about Vault1984 infrastructure status - Collect details about issues (region, symptoms, timestamps) - Be concise and professional If someone reports a problem, acknowledge it, collect: affected region, what they observed, and when it started. Keep responses short.` const version = "v0.4" const ( colorGreen = "#76AD2A" colorOrange = "#E86235" colorRed = "#E04343" colorGray = "#D8D6D0" ) // Schema const schema = ` CREATE TABLE IF NOT EXISTS nodes ( id TEXT PRIMARY KEY, name TEXT NOT NULL, region TEXT NOT NULL, ip TEXT NOT NULL DEFAULT '', status TEXT NOT NULL DEFAULT 'planned' ); CREATE TABLE IF NOT EXISTS uptime ( node_id TEXT NOT NULL, date TEXT NOT NULL, -- YYYY-MM-DD status TEXT NOT NULL, -- operational | degraded | outage | planned PRIMARY KEY (node_id, date) ); CREATE TABLE IF NOT EXISTS incidents ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, status TEXT NOT NULL, -- resolved | monitoring | investigating date TEXT NOT NULL, -- display date e.g. "Mar 4, 2026" node_ids TEXT NOT NULL DEFAULT '', created_at INTEGER NOT NULL DEFAULT (strftime('%s','now')) ); CREATE TABLE IF NOT EXISTS incident_updates ( id INTEGER PRIMARY KEY AUTOINCREMENT, incident_id INTEGER NOT NULL REFERENCES incidents(id), ts TEXT NOT NULL, -- ISO8601 e.g. "2026-03-04 16:08 UTC" status TEXT NOT NULL, -- investigating | identified | monitoring | resolved body TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS telemetry ( id INTEGER PRIMARY KEY AUTOINCREMENT, node_id TEXT NOT NULL, received_at INTEGER NOT NULL DEFAULT (strftime('%s','now')), version TEXT NOT NULL DEFAULT '', hostname TEXT NOT NULL DEFAULT '', uptime_seconds INTEGER NOT NULL DEFAULT 0, cpu_percent REAL NOT NULL DEFAULT 0, memory_total_mb INTEGER NOT NULL DEFAULT 0, memory_used_mb INTEGER NOT NULL DEFAULT 0, disk_total_mb INTEGER NOT NULL DEFAULT 0, disk_used_mb INTEGER NOT NULL DEFAULT 0, load_1m REAL NOT NULL DEFAULT 0, vault_count INTEGER NOT NULL DEFAULT 0, vault_size_mb REAL NOT NULL DEFAULT 0, vault_entries INTEGER NOT NULL DEFAULT 0, mode TEXT NOT NULL DEFAULT '' ); ` type Node struct { ID string Name string Region string IP string Status string History []DayStatus } type DayStatus struct { Date string Status string } type IncidentUpdate struct { TS string Status string Body string } type Incident struct { ID int64 Title string Status string Date string NodeIDs string Updates []IncidentUpdate } type PageData struct { UpdatedAt string OverallOK bool Nodes []*Node Incidents []Incident } func (n *Node) UptimePct() string { if n.Status == "planned" { return "—" } total, ok := 0, 0 for _, h := range n.History { if h.Status == "planned" || h.Status == "nodata" { continue } total++ if h.Status == "operational" { ok++ } } if total == 0 { return "—" } return fmt.Sprintf("%.2f%%", float64(ok)/float64(total)*100) } func (n *Node) SVGBars() template.HTML { var sb strings.Builder sb.WriteString(``) for i, day := range n.History { x := i * 5 color := colorGray switch day.Status { case "operational": color = colorGreen case "degraded": color = colorOrange case "outage": color = colorRed case "nodata": color = "none" } title := fmt.Sprintf("%s: %s", day.Date, day.Status) sb.WriteString(fmt.Sprintf( `%s`, x, color, title, )) } sb.WriteString(``) return template.HTML(sb.String()) } func (n *Node) StatusLabel() string { switch n.Status { case "operational": return "Operational" case "degraded": return "Degraded Performance" case "outage": return "Major Outage" case "planned": return "Planned" } return "Unknown" } func (n *Node) StatusColor() string { switch n.Status { case "operational": return colorGreen case "degraded": return colorOrange case "outage": return colorRed } return colorGray } func nodeOrder(n *Node) int { if n.ID == "hq-zurich" { return 0 } if n.Status == "operational" { return 1 } return 2 } var db *sql.DB // seedData populates the DB on first run func seedData() { // Insert nodes nodeList := []struct{ id, name, region, ip, status string }{ {"virginia", "Virginia", "us-east-1", "18.209.55.127", "operational"}, {"singapore", "Singapore", "ap-southeast-1", "47.129.4.217", "operational"}, {"zurich", "Zürich", "eu-central-2", "16.18.20.81", "operational"}, {"ncalifornia", "N. California", "us-west-1", "", "planned"}, {"montreal", "Montreal", "ca-central-1", "", "planned"}, {"mexicocity", "Mexico City", "mx-central-1", "", "planned"}, {"saopaulo", "São Paulo", "sa-east-1", "", "planned"}, {"london", "London", "eu-west-2", "", "planned"}, {"paris", "Paris", "eu-west-3", "", "planned"}, {"spain", "Spain", "eu-south-2", "", "planned"}, {"stockholm", "Stockholm", "eu-north-1", "", "planned"}, {"uae", "UAE", "me-central-1", "", "planned"}, {"telaviv", "Tel Aviv", "il-central-1", "", "planned"}, {"capetown", "Cape Town", "af-south-1", "", "planned"}, {"mumbai", "Mumbai", "ap-south-1", "", "planned"}, {"jakarta", "Jakarta", "ap-southeast-3", "", "planned"}, {"malaysia", "Malaysia", "ap-southeast-5", "", "planned"}, {"sydney", "Sydney", "ap-southeast-2", "", "planned"}, {"seoul", "Seoul", "ap-northeast-2", "", "planned"}, {"hongkong", "Hong Kong", "ap-east-1", "", "planned"}, {"tokyo", "Tokyo", "ap-northeast-1", "", "planned"}, } for _, n := range nodeList { db.Exec(`INSERT OR IGNORE INTO nodes(id,name,region,ip,status) VALUES(?,?,?,?,?)`, n.id, n.name, n.region, n.ip, n.status) } // Planned nodes: seed gray bars so the UI renders correctly var plannedIDs []string rows, _ := db.Query(`SELECT id FROM nodes WHERE status='planned'`) for rows.Next() { var id string rows.Scan(&id) plannedIDs = append(plannedIDs, id) } rows.Close() now := time.Now() for _, nodeID := range plannedIDs { for i := 89; i >= 0; i-- { date := now.AddDate(0, 0, -i).Format("2006-01-02") db.Exec(`INSERT OR IGNORE INTO uptime(node_id,date,status) VALUES(?,?,?)`, nodeID, date, "planned") } } } func loadNodes() []*Node { rows, err := db.Query(`SELECT id,name,region,ip,status FROM nodes`) if err != nil { log.Printf("loadNodes error: %v", err) return nil } defer rows.Close() var nodes []*Node for rows.Next() { n := &Node{} rows.Scan(&n.ID, &n.Name, &n.Region, &n.IP, &n.Status) nodes = append(nodes, n) } // Load 90-day history for each node now := time.Now() for _, n := range nodes { n.History = make([]DayStatus, 90) // Build date map from DB histMap := map[string]string{} hrows, _ := db.Query(`SELECT date,status FROM uptime WHERE node_id=? ORDER BY date`, n.ID) for hrows.Next() { var date, status string hrows.Scan(&date, &status) histMap[date] = status } hrows.Close() for i := 0; i < 90; i++ { date := now.AddDate(0, 0, -(89 - i)).Format("2006-01-02") status, ok := histMap[date] if !ok { if n.Status == "planned" { status = "planned" } else { status = "nodata" // live node, no record yet — render transparent } } n.History[i] = DayStatus{Date: date, Status: status} } } sort.SliceStable(nodes, func(i, j int) bool { oi, oj := nodeOrder(nodes[i]), nodeOrder(nodes[j]) if oi != oj { return oi < oj } return nodes[i].Name < nodes[j].Name }) return nodes } func loadIncidents() []Incident { rows, err := db.Query(`SELECT id,title,status,date,node_ids FROM incidents ORDER BY created_at DESC`) if err != nil { return nil } defer rows.Close() var list []Incident for rows.Next() { var inc Incident rows.Scan(&inc.ID, &inc.Title, &inc.Status, &inc.Date, &inc.NodeIDs) list = append(list, inc) } rows.Close() // Load updates for each incident for i := range list { urows, err := db.Query( `SELECT ts,status,body FROM incident_updates WHERE incident_id=? ORDER BY id ASC`, list[i].ID, ) if err != nil { continue } for urows.Next() { var u IncidentUpdate urows.Scan(&u.TS, &u.Status, &u.Body) list[i].Updates = append(list[i].Updates, u) } urows.Close() } return list } func buildPage() PageData { nodes := loadNodes() ok := true for _, n := range nodes { if n.Status == "outage" || n.Status == "degraded" { ok = false break } } return PageData{ UpdatedAt: time.Now().UTC().Format("Jan 2, 2006 15:04 UTC"), OverallOK: ok, Nodes: nodes, Incidents: loadIncidents(), } } var tpl = template.Must(template.New("page").Parse(` Vault1984 Status
{{if .OverallOK}} {{else}} {{end}}
Uptime over the past 90 days.
Regional Points of Presence
{{range .Nodes}}
{{.Name}} {{.Region}} {{.StatusLabel}}
{{.SVGBars}}
90 days ago {{.UptimePct}} uptime Today
{{end}} {{if .Incidents}}
Past Incidents
{{range .Incidents}}
{{.Title}} {{.Date}}
{{.Status}} {{if .NodeIDs}}

Affected: {{.NodeIDs}}

{{end}} {{if .Updates}}
{{range .Updates}}
{{.TS}}
{{.Status}}
{{.Body}}
{{end}}
{{end}}
{{end}}
{{end}}
🛡 Vault1984 Support
Hi! Report a network issue or ask about our infrastructure. Which region are you seeing problems with?
`)) // tailscaleOnly allows only requests from Tailscale (100.64.0.0/10) or localhost func tailscaleOnly(next http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { ip := r.RemoteAddr if i := strings.LastIndex(ip, ":"); i >= 0 { ip = ip[:i] } ip = strings.Trim(ip, "[]") allowed := ip == "127.0.0.1" || ip == "::1" || strings.HasPrefix(ip, "100.") if !allowed { http.Error(w, "Forbidden", 403) return } next(w, r) } } func handleOpsDashboard(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html; charset=utf-8") fmt.Fprint(w, opsDashboardHTML) } const opsDashboardHTML = ` vault1984 NOC

⚡ vault1984 NOC

loading...
` func handleAdminGUI(w http.ResponseWriter, r *http.Request) { nodes := loadNodes() incidents := loadIncidents() w.Header().Set("Content-Type", "text/html; charset=utf-8") adminTpl.Execute(w, map[string]interface{}{ "Nodes": nodes, "Incidents": incidents, "Now": time.Now().UTC().Format("Jan 2, 2006 15:04 UTC"), }) } func handleAdminNewIncident(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.WriteHeader(405); return } var p struct { Title string `json:"title"` Status string `json:"status"` Body string `json:"body"` NodeIDs string `json:"node_ids"` } decode(r, &p) date := time.Now().UTC().Format("Jan 2, 2006") res, err := db.Exec(`INSERT INTO incidents(title,status,date,node_ids) VALUES(?,?,?,?)`, p.Title, p.Status, date, p.NodeIDs) if err != nil { jsonErr(w, err); return } id, _ := res.LastInsertId() ts := time.Now().UTC().Format("Jan 2, 2006 15:04 UTC") db.Exec(`INSERT INTO incident_updates(incident_id,ts,status,body) VALUES(?,?,?,?)`, id, ts, p.Status, p.Body) // Update affected nodes if p.NodeIDs != "" { for _, nid := range strings.Split(p.NodeIDs, ",") { nid = strings.TrimSpace(nid) if nid != "" { db.Exec(`UPDATE nodes SET status=? WHERE id=?`, p.Status, nid) } } } log.Printf("admin: new incident #%d: %s", id, p.Title) jsonOK(w) } func handleAdminUpdateIncident(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.WriteHeader(405); return } var p struct { IncidentID int64 `json:"incident_id"` Status string `json:"status"` Body string `json:"body"` } decode(r, &p) ts := time.Now().UTC().Format("Jan 2, 2006 15:04 UTC") db.Exec(`INSERT INTO incident_updates(incident_id,ts,status,body) VALUES(?,?,?,?)`, p.IncidentID, ts, p.Status, p.Body) db.Exec(`UPDATE incidents SET status=? WHERE id=?`, p.Status, p.IncidentID) log.Printf("admin: updated incident #%d → %s", p.IncidentID, p.Status) jsonOK(w) } func handleAdminNodeStatus(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.WriteHeader(405); return } var p struct { ID string `json:"id"` Status string `json:"status"` } decode(r, &p) today := time.Now().Format("2006-01-02") db.Exec(`UPDATE nodes SET status=? WHERE id=?`, p.Status, p.ID) db.Exec(`INSERT OR REPLACE INTO uptime(node_id,date,status) VALUES(?,?,?)`, p.ID, today, p.Status) log.Printf("admin: node %s → %s", p.ID, p.Status) jsonOK(w) } func decode(r *http.Request, v interface{}) { ct := r.Header.Get("Content-Type") if strings.Contains(ct, "application/json") { json.NewDecoder(r.Body).Decode(v) } else { r.ParseForm() // map form values into struct via JSON round-trip m := map[string]string{} for k, vs := range r.Form { if len(vs) > 0 { m[k] = vs[0] } } b, _ := json.Marshal(m) json.Unmarshal(b, v) } } func jsonOK(w http.ResponseWriter) { w.Header().Set("Content-Type", "application/json") fmt.Fprintln(w, `{"ok":true}`) } func jsonErr(w http.ResponseWriter, err error) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(500) json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) } var adminTpl = template.Must(template.New("admin").Parse(` Vault1984 Admin

🛡 Vault1984 Admin

{{.Now}} • ← Public status page

Node Status

{{range .Nodes}}
{{.Name}}
{{.Region}}
{{end}}

Create Incident

Incident created ✓

Post Update to Incident

{{if .Incidents}}
Update posted ✓
{{else}}

No incidents yet.

{{end}}
`)) func handleChat(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.WriteHeader(405) return } var req struct { Messages []map[string]string `json:"messages"` } if err := json.NewDecoder(r.Body).Decode(&req); err != nil { w.WriteHeader(400) return } // Prepend system message msgs := append([]map[string]string{{"role": "system", "content": systemPrompt}}, req.Messages...) payload, _ := json.Marshal(map[string]interface{}{ "model": fireworksModel, "messages": msgs, "max_tokens": 300, "temperature": 0.7, }) fwReq, _ := http.NewRequest("POST", fireworksURL, bytes.NewReader(payload)) fwReq.Header.Set("Authorization", "Bearer "+fireworksKey) fwReq.Header.Set("Content-Type", "application/json") resp, err := http.DefaultClient.Do(fwReq) if err != nil { log.Printf("fireworks error: %v", err) http.Error(w, `{"error":"upstream error"}`, 502) return } defer resp.Body.Close() body, _ := io.ReadAll(resp.Body) var fw struct { Choices []struct { Message struct { Content string `json:"content"` } `json:"message"` } `json:"choices"` } if err := json.Unmarshal(body, &fw); err != nil || len(fw.Choices) == 0 { log.Printf("fireworks parse error: %s", body) http.Error(w, `{"error":"parse error"}`, 500) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{ "reply": strings.TrimSpace(fw.Choices[0].Message.Content), }) } // OTLP JSON metric receiver func handleOTLP(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.WriteHeader(405) return } var payload struct { ResourceMetrics []struct { Resource struct { Attributes []struct { Key string `json:"key"` Value struct { StringValue string `json:"stringValue"` } `json:"value"` } `json:"attributes"` } `json:"resource"` ScopeMetrics []struct { Metrics []struct { Name string `json:"name"` Gauge struct { DataPoints []struct { AsDouble float64 `json:"asDouble"` } `json:"dataPoints"` } `json:"gauge"` } `json:"metrics"` } `json:"scopeMetrics"` } `json:"resourceMetrics"` } if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { w.WriteHeader(400) log.Printf("OTLP decode error: %v", err) return } for _, rm := range payload.ResourceMetrics { // Extract node.id from resource attributes nodeID := "" for _, a := range rm.Resource.Attributes { if a.Key == "node.id" { nodeID = a.Value.StringValue } } if nodeID == "" { continue } // Extract metrics metrics := map[string]float64{} for _, sm := range rm.ScopeMetrics { for _, m := range sm.Metrics { if len(m.Gauge.DataPoints) > 0 { metrics[m.Name] = m.Gauge.DataPoints[0].AsDouble } } } cpu := metrics["system.cpu.utilization"] mem := metrics["system.memory.utilization"] disk := metrics["system.disk.utilization"] // Determine status from thresholds status := "operational" if cpu > 0.9 || mem > 0.9 || disk > 0.9 { status = "degraded" } // Update node status + today's uptime today := time.Now().Format("2006-01-02") db.Exec(`UPDATE nodes SET status=? WHERE id=?`, status, nodeID) db.Exec(`INSERT OR REPLACE INTO uptime(node_id,date,status) VALUES(?,?,?)`, nodeID, today, status) // Store latest metrics as JSON in a simple kv table (lazy: reuse uptime body col if available, or just log) log.Printf("OTLP [%s] cpu=%.1f%% mem=%.1f%% disk=%.1f%% → %s", nodeID, cpu*100, mem*100, disk*100, status) } w.Header().Set("Content-Type", "application/json") fmt.Fprintln(w, `{"partialSuccess":{}}`) } func main() { fmt.Printf("=== Vault1984 Dashboard %s ===\n", version) var err error db, err = sql.Open("sqlite", "./status.db") if err != nil { log.Fatalf("open db: %v", err) } db.SetMaxOpenConns(1) // SQLite: single writer if _, err := db.Exec(schema); err != nil { log.Fatalf("schema: %v", err) } seedData() log.Println("DB ready: status.db") http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html; charset=utf-8") tpl.Execute(w, buildPage()) }) http.HandleFunc("/api/status", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") nodes := loadNodes() ok := true for _, n := range nodes { if n.Status == "outage" || n.Status == "degraded" { ok = false break } } status := "operational" if !ok { status = "degraded" } json.NewEncoder(w).Encode(map[string]interface{}{ "status": status, "updated": time.Now().UTC().Format(time.RFC3339), "nodes": len(nodes), }) }) http.HandleFunc("/api/chat", handleChat) http.HandleFunc("/v1/metrics", handleOTLP) http.HandleFunc("/download/agent", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Disposition", "attachment; filename=v1984-agent") w.Header().Set("Content-Type", "application/octet-stream") http.ServeFile(w, r, "./v1984-agent") }) http.HandleFunc("/download/agent-arm64", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Disposition", "attachment; filename=v1984-agent") w.Header().Set("Content-Type", "application/octet-stream") http.ServeFile(w, r, "./v1984-agent-arm64") }) http.HandleFunc("/download/vault1984", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Disposition", "attachment; filename=vault1984") w.Header().Set("Content-Type", "application/octet-stream") http.ServeFile(w, r, "./vault1984") }) http.HandleFunc("/download/vault1984-arm64", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Disposition", "attachment; filename=vault1984") w.Header().Set("Content-Type", "application/octet-stream") http.ServeFile(w, r, "./vault1984-arm64") }) http.HandleFunc("/api/nodes", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]interface{}{ "nodes": loadNodes(), "incidents": loadIncidents(), }) }) // Admin routes — Tailscale only http.HandleFunc("/admin", tailscaleOnly(handleAdminGUI)) http.HandleFunc("/admin/incident/new", tailscaleOnly(handleAdminNewIncident)) http.HandleFunc("/admin/incident/update", tailscaleOnly(handleAdminUpdateIncident)) http.HandleFunc("/admin/node/status", tailscaleOnly(handleAdminNodeStatus)) // vault1984 binary telemetry ingestion http.HandleFunc("/telemetry", func(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.WriteHeader(405) return } // Optional token auth: if TELEMETRY_TOKEN env is set, enforce it if tok := os.Getenv("TELEMETRY_TOKEN"); tok != "" { if r.Header.Get("X-Telemetry-Token") != tok && r.URL.Query().Get("token") != tok { w.WriteHeader(401) fmt.Fprintln(w, `{"error":"unauthorized"}`) return } } var p struct { Version string `json:"version"` Hostname string `json:"hostname"` Uptime int64 `json:"uptime_seconds"` System struct { CPUPercent float64 `json:"cpu_percent"` MemTotalMB int64 `json:"memory_total_mb"` MemUsedMB int64 `json:"memory_used_mb"` DiskTotalMB int64 `json:"disk_total_mb"` DiskUsedMB int64 `json:"disk_used_mb"` Load1m float64 `json:"load_1m"` } `json:"system"` Vaults struct { Count int `json:"count"` TotalSizeMB float64 `json:"total_size_mb"` TotalEntries int `json:"total_entries"` } `json:"vaults"` Mode string `json:"mode"` } if err := json.NewDecoder(r.Body).Decode(&p); err != nil { w.WriteHeader(400) fmt.Fprintln(w, `{"error":"bad request"}`) return } // Map hostname → node_id // Priority: hostname matches node id → hostname matches node name → source IP → hostname as-is nodeID := p.Hostname var knownID string // Try direct id match db.QueryRow(`SELECT id FROM nodes WHERE id=?`, nodeID).Scan(&knownID) if knownID == "" { // Try matching by hostname against node name (case-insensitive) db.QueryRow(`SELECT id FROM nodes WHERE lower(name)=lower(?) OR lower(id)=lower(?)`, nodeID, nodeID).Scan(&knownID) } if knownID == "" { // Try matching by source IP (strips port) remoteIP := r.RemoteAddr if host, _, ok := strings.Cut(remoteIP, ":"); ok { remoteIP = host } db.QueryRow(`SELECT id FROM nodes WHERE ip=?`, remoteIP).Scan(&knownID) } if knownID != "" { nodeID = knownID } db.Exec(`INSERT INTO telemetry(node_id,version,hostname,uptime_seconds,cpu_percent,memory_total_mb,memory_used_mb,disk_total_mb,disk_used_mb,load_1m,vault_count,vault_size_mb,vault_entries,mode) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)`, nodeID, p.Version, p.Hostname, p.Uptime, p.System.CPUPercent, p.System.MemTotalMB, p.System.MemUsedMB, p.System.DiskTotalMB, p.System.DiskUsedMB, p.System.Load1m, p.Vaults.Count, p.Vaults.TotalSizeMB, p.Vaults.TotalEntries, p.Mode, ) // Keep last 1000 rows per node (scoped to node_id) db.Exec(`DELETE FROM telemetry WHERE node_id=? AND id NOT IN (SELECT id FROM telemetry WHERE node_id=? ORDER BY id DESC LIMIT 1000)`, nodeID, nodeID) // Drive public status page: update node status + today's uptime record today := time.Now().Format("2006-01-02") db.Exec(`UPDATE nodes SET status='operational' WHERE id=?`, nodeID) db.Exec(`INSERT OR REPLACE INTO uptime(node_id,date,status) VALUES(?,?,'operational')`, nodeID, today) log.Printf("telemetry: %s (v%s) uptime=%ds cpu=%.1f%%", nodeID, p.Version, p.Uptime, p.System.CPUPercent) w.Header().Set("Content-Type", "application/json") fmt.Fprintln(w, `{"ok":true}`) }) // Latest telemetry snapshot per node http.HandleFunc("/api/telemetry", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") w.Header().Set("Cache-Control", "no-store, no-cache, must-revalidate") rows, err := db.Query(` SELECT t.node_id, t.received_at, t.version, t.hostname, t.uptime_seconds, t.cpu_percent, t.memory_total_mb, t.memory_used_mb, t.disk_total_mb, t.disk_used_mb, t.load_1m, t.vault_count, t.vault_size_mb, t.vault_entries, t.mode FROM telemetry t INNER JOIN ( SELECT node_id, MAX(id) AS max_id FROM telemetry GROUP BY node_id ) latest ON t.id = latest.max_id ORDER BY t.node_id `) if err != nil { w.WriteHeader(500) return } defer rows.Close() type TRow struct { NodeID string `json:"node_id"` ReceivedAt int64 `json:"received_at"` Version string `json:"version"` Hostname string `json:"hostname"` Uptime int64 `json:"uptime_seconds"` CPUPct float64 `json:"cpu_percent"` MemTotal int64 `json:"memory_total_mb"` MemUsed int64 `json:"memory_used_mb"` DiskTotal int64 `json:"disk_total_mb"` DiskUsed int64 `json:"disk_used_mb"` Load1m float64 `json:"load_1m"` VaultCount int `json:"vault_count"` VaultSize float64 `json:"vault_size_mb"` VaultEntries int `json:"vault_entries"` Mode string `json:"mode"` } var result []TRow for rows.Next() { var t TRow rows.Scan(&t.NodeID, &t.ReceivedAt, &t.Version, &t.Hostname, &t.Uptime, &t.CPUPct, &t.MemTotal, &t.MemUsed, &t.DiskTotal, &t.DiskUsed, &t.Load1m, &t.VaultCount, &t.VaultSize, &t.VaultEntries, &t.Mode) result = append(result, t) } json.NewEncoder(w).Encode(map[string]interface{}{"telemetry": result}) }) // Telemetry history for a node: /api/telemetry/history?node=virginia&limit=60 http.HandleFunc("/api/telemetry/history", tailscaleOnly(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") nodeID := r.URL.Query().Get("node") limit := 60 if l := r.URL.Query().Get("limit"); l != "" { fmt.Sscanf(l, "%d", &limit) } if limit > 1000 { limit = 1000 } type TRow struct { ReceivedAt int64 `json:"ts"` CPUPct float64 `json:"cpu"` MemUsedMB int64 `json:"mem_used_mb"` MemTotalMB int64 `json:"mem_total_mb"` DiskUsedMB int64 `json:"disk_used_mb"` DiskTotalMB int64 `json:"disk_total_mb"` Load1m float64 `json:"load_1m"` UptimeSec int64 `json:"uptime_seconds"` } rows, err := db.Query(` SELECT received_at, cpu_percent, memory_used_mb, memory_total_mb, disk_used_mb, disk_total_mb, load_1m, uptime_seconds FROM telemetry WHERE node_id=? ORDER BY id DESC LIMIT ?`, nodeID, limit) if err != nil { w.WriteHeader(500); return } defer rows.Close() var result []TRow for rows.Next() { var t TRow rows.Scan(&t.ReceivedAt, &t.CPUPct, &t.MemUsedMB, &t.MemTotalMB, &t.DiskUsedMB, &t.DiskTotalMB, &t.Load1m, &t.UptimeSec) result = append(result, t) } // Reverse to chronological order for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 { result[i], result[j] = result[j], result[i] } json.NewEncoder(w).Encode(map[string]interface{}{"node": nodeID, "history": result}) })) // Internal NOC ops dashboard — Tailscale only http.HandleFunc("/ops", tailscaleOnly(handleOpsDashboard)) // POPs push metrics here http.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.WriteHeader(405) return } var p struct { ID string `json:"id"` Status string `json:"status"` } if err := json.NewDecoder(r.Body).Decode(&p); err != nil { w.WriteHeader(400) return } today := time.Now().Format("2006-01-02") db.Exec(`INSERT OR REPLACE INTO uptime(node_id,date,status) VALUES(?,?,?)`, p.ID, today, p.Status) db.Exec(`UPDATE nodes SET status=? WHERE id=?`, p.Status, p.ID) log.Printf("metrics: %s → %s", p.ID, p.Status) w.Header().Set("Content-Type", "application/json") fmt.Fprintln(w, `{"ok":true}`) }) // Start POP health monitor goroutine go runHealthMonitor() log.Println("Listening on :8080") log.Fatal(http.ListenAndServe(":8080", nil)) } // runHealthMonitor posts to Kuma push URL every 60s and webhooks on problems. // Configure via env: KUMA_PUSH_URL, ALERT_WEBHOOK_URL func runHealthMonitor() { kumaURL := os.Getenv("KUMA_PUSH_URL") webhookURL := os.Getenv("ALERT_WEBHOOK_URL") expectedNodes := []string{"virginia", "singapore", "zurich"} staleThreshold := int64(300) // 5 min — Kuma fires after 2 missed 60s beats alerting := false // track state to avoid webhook spam if kumaURL == "" { log.Println("health monitor: KUMA_PUSH_URL not set — monitoring disabled") return } log.Printf("health monitor: posting to Kuma every 60s, alert webhook=%v", webhookURL != "") ticker := time.NewTicker(60 * time.Second) defer ticker.Stop() doPost := func(url, body string) { req, _ := http.NewRequest("GET", url, nil) if body != "" { req, _ = http.NewRequest("POST", url, strings.NewReader(body)) req.Header.Set("Content-Type", "application/json") } http.DefaultClient.Timeout = 10 * time.Second resp, err := http.DefaultClient.Do(req) if err != nil { log.Printf("health monitor: POST error: %v", err) return } resp.Body.Close() } for range ticker.C { now := time.Now().Unix() type row struct { nodeID string receivedAt int64 } rows, err := db.Query(` SELECT t.node_id, t.received_at FROM telemetry t INNER JOIN (SELECT node_id, MAX(id) AS max_id FROM telemetry GROUP BY node_id) latest ON t.id = latest.max_id `) if err != nil { log.Printf("health monitor: db error: %v", err) continue } latest := map[string]int64{} for rows.Next() { var r row rows.Scan(&r.nodeID, &r.receivedAt) latest[r.nodeID] = r.receivedAt } rows.Close() var problems []string for _, node := range expectedNodes { ts, ok := latest[node] if !ok { problems = append(problems, node+": no data") } else if now-ts > staleThreshold { problems = append(problems, fmt.Sprintf("%s: silent %ds", node, now-ts)) } } if len(problems) > 0 { // Don't push to Kuma — missing beat triggers Kuma alert log.Printf("health monitor: PROBLEM — %v", problems) if webhookURL != "" && !alerting { alerting = true body := fmt.Sprintf(`{"text":"⚠️ vault1984 POP alert\n%s"}`, strings.Join(problems, "\\n")) doPost(webhookURL, body) } } else { // All healthy — push heartbeat to Kuma alerting = false oldest := int64(0) for _, ts := range latest { if age := now - ts; age > oldest { oldest = age } } pushURL := fmt.Sprintf("%s?status=up&msg=OK+oldest=%ds&ping=%d", kumaURL, oldest, oldest) doPost(pushURL, "") log.Printf("health monitor: OK — pushed to Kuma (oldest beat %ds)", oldest) } } }