clavitor/operations/pop-sync/main.go

2488 lines
69 KiB
Go

package main
import (
"bytes"
"context"
"database/sql"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
awsconfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/aws/aws-sdk-go-v2/service/ssm"
_ "modernc.org/sqlite"
)
// ---------------------------------------------------------------------------
// pop-sync — Clavitor fleet management tool
//
// Subcommands:
// sync Reconcile DNS + Tailscale to match the pops DB
// deploy Build clovis-vault for arm64, push to all live nodes, restart
// status Health-check all live nodes (Tailscale reachable, service up)
// exec Run a shell command on all (or specific) live nodes
//
// Designed for both human and agent use:
// --json Machine-readable JSON output
// Exit 0 Everything OK
// Exit 1 Partial failure (some nodes failed)
// Exit 2 Fatal / config error
// ---------------------------------------------------------------------------
// --- Types ---
type POP struct {
PopID int `json:"pop_id"`
City string `json:"city"`
Country string `json:"country"`
RegionName string `json:"region_name"`
IP string `json:"ip"`
DNS string `json:"dns"`
Status string `json:"status"`
Provider string `json:"provider"`
InstanceID string `json:"instance_id"`
Arch string `json:"arch"`
}
func (p POP) Subdomain() string {
parts := strings.SplitN(p.DNS, ".", 2)
if len(parts) > 0 {
return parts[0]
}
return ""
}
func (p POP) Zone() string {
parts := strings.SplitN(p.DNS, ".", 2)
if len(parts) == 2 {
return parts[1]
}
return ""
}
type Config struct {
DBPath string
CFToken string
TSKey string
TSAuthKey string
DryRun bool
JSONOut bool
Zone string
CFZoneID string
VaultSrc string // path to vault source (for local builds)
VaultBinary string // path to pre-built vault binary (takes precedence)
Nodes string // comma-separated node filter (empty = all)
AWSKeyID string
AWSSecretKey string
ISHostingKey string
CADir string // path to CA directory for mTLS certs (default: ./ca)
}
type NodeResult struct {
Node string `json:"node"`
Action string `json:"action"`
OK bool `json:"ok"`
Message string `json:"message,omitempty"`
Error string `json:"error,omitempty"`
}
// --- Main ---
func main() {
if len(os.Args) < 2 {
printUsage()
os.Exit(2)
}
cmd := os.Args[1]
// Shift args so flags work after subcommand
os.Args = append(os.Args[:1], os.Args[2:]...)
cfg, remaining := parseFlags()
switch cmd {
case "sync":
exitWith(cmdSync(cfg))
case "deploy":
exitWith(cmdDeploy(cfg))
case "status":
exitWith(cmdStatus(cfg))
case "exec":
if len(remaining) == 0 {
fatal("usage: pop-sync exec <command>")
}
exitWith(cmdExec(cfg, strings.Join(remaining, " ")))
case "firewall":
exitWith(cmdFirewall(cfg))
case "update":
exitWith(cmdUpdate(cfg))
case "provision":
if len(remaining) == 0 {
fatal("usage: pop-sync provision <city> [city2 ...] (e.g. pop-sync provision Tokyo Calgary)")
}
exitWith(cmdProvision(cfg, remaining))
case "bootstrap":
if len(remaining) < 2 {
fatal("usage: pop-sync bootstrap <city> <ip> [root-password] (omit password if SSH key installed)")
}
password := ""
if len(remaining) >= 3 {
password = remaining[2]
}
exitWith(cmdBootstrap(cfg, remaining[0], remaining[1], password))
case "maintenance":
if len(remaining) == 0 {
fatal("usage: pop-sync maintenance <on|off> [reason]")
}
cmdMaintenance(cfg, remaining)
case "ca":
if len(remaining) == 0 {
fatal("usage: pop-sync ca <init|issue> [args...]")
}
switch remaining[0] {
case "init":
exitWith(cmdCAInit(cfg, remaining[1:]))
case "issue":
exitWith(cmdCAIssue(cfg, remaining[1:]))
default:
fatal("usage: pop-sync ca <init|issue> [args...]")
}
case "sync-pops":
exitWith(cmdSyncPops(cfg))
case "help", "--help", "-h":
printUsage()
default:
fmt.Fprintf(os.Stderr, "unknown command: %s\n", cmd)
printUsage()
os.Exit(2)
}
}
func printUsage() {
fmt.Println(`pop-sync — Clavitor fleet management
Commands:
sync Reconcile DNS + Tailscale with pops DB (create, update, delete)
deploy Build clovis-vault (arm64), deploy to all live nodes, graceful restart
status Health-check all live nodes (reachable, service running, version)
exec Run a command on live nodes: pop-sync exec <command>
maintenance Toggle maintenance mode: pop-sync maintenance on "fleet deploy"
sync-pops Sync pops table from operations DB to corporate DB
ca Private CA operations for mTLS
ca init Initialize root + intermediate CA (one-time setup)
ca issue Issue POP certificate: ca issue --pop <region> --sans <dns,ip,...>
Flags:
-db Path to clavitor.db (default: ../clavitor.com/clavitor.db)
-cf-token Cloudflare API token (or CF_API_TOKEN env)
-ts-key Tailscale API key (or TS_API_KEY env)
-ts-authkey Tailscale auth key for joining new nodes (or TS_AUTHKEY env)
-binary Path to pre-built vault binary (skips local build)
-vault-src Path to clovis-vault source (default: ../clavis/clavis-vault)
-ca-dir Path to CA directory for mTLS certs (default: ./ca)
-zone DNS zone (default: clavitor.ai)
-nodes Comma-separated node filter, e.g. "use1,sg1" (default: all)
-dry-run Show what would change without doing it
-json Output results as JSON (agent-friendly)
CA Examples:
pop-sync ca init
pop-sync ca issue --pop use1 --sans use1.clavitor.ai,198.51.100.1
pop-sync ca issue --pop sg1 --sans sg1.clavitor.ai,sg1.pop.clavitor.ai,203.0.113.5
Examples:
pop-sync sync --dry-run
pop-sync deploy
pop-sync status --json
pop-sync exec "systemctl status clavitor"
pop-sync exec -nodes use1,sg1 "journalctl -u clavitor -n 20"`)
}
func parseFlags() (Config, []string) {
cfg := Config{}
var remaining []string
for i := 1; i < len(os.Args); i++ {
arg := os.Args[i]
if !strings.HasPrefix(arg, "-") {
remaining = append(remaining, arg)
continue
}
next := func() string {
if i+1 < len(os.Args) {
i++
return os.Args[i]
}
fatal("missing value for %s", arg)
return ""
}
switch arg {
case "-db":
cfg.DBPath = next()
case "-cf-token":
cfg.CFToken = next()
case "-ts-key":
cfg.TSKey = next()
case "-ts-authkey":
cfg.TSAuthKey = next()
case "-vault-src":
cfg.VaultSrc = next()
case "-binary":
cfg.VaultBinary = next()
case "-zone":
cfg.Zone = next()
case "-cf-zone-id":
cfg.CFZoneID = next()
case "-nodes":
cfg.Nodes = next()
case "-aws-key":
cfg.AWSKeyID = next()
case "-aws-secret":
cfg.AWSSecretKey = next()
case "-ishosting-key":
cfg.ISHostingKey = next()
case "-ca-dir":
cfg.CADir = next()
case "-dry-run", "--dry-run":
cfg.DryRun = true
case "-json", "--json":
cfg.JSONOut = true
default:
remaining = append(remaining, arg)
}
}
// Defaults
if cfg.DBPath == "" {
cfg.DBPath = "../clavitor.com/clavitor.db"
}
if cfg.CFToken == "" {
cfg.CFToken = os.Getenv("CF_API_TOKEN")
}
if cfg.TSKey == "" {
cfg.TSKey = os.Getenv("TS_API_KEY")
}
if cfg.TSAuthKey == "" {
cfg.TSAuthKey = os.Getenv("TS_AUTHKEY")
}
if cfg.VaultBinary == "" {
cfg.VaultBinary = os.Getenv("VAULT_BINARY")
}
if cfg.VaultSrc == "" {
cfg.VaultSrc = "../clavis/clavis-vault"
}
if cfg.Zone == "" {
cfg.Zone = "clavitor.ai"
}
if cfg.AWSKeyID == "" {
cfg.AWSKeyID = os.Getenv("AWS_ACCESS_KEY_ID")
}
if cfg.AWSSecretKey == "" {
cfg.AWSSecretKey = os.Getenv("AWS_SECRET_ACCESS_KEY")
}
if cfg.ISHostingKey == "" {
cfg.ISHostingKey = os.Getenv("ISHOSTING_API_KEY")
}
if cfg.CADir == "" {
cfg.CADir = "./ca"
}
return cfg, remaining
}
func exitWith(results []NodeResult) {
hasFailure := false
for _, r := range results {
if !r.OK {
hasFailure = true
break
}
}
if hasFailure {
os.Exit(1)
}
}
// --- Subcommand: sync ---
func cmdSync(cfg Config) []NodeResult {
requireKeys(cfg, "cf", "ts")
pops := loadLivePOPs(cfg)
log(cfg, "DB: %d live POPs with DNS+IP", len(pops))
if cfg.CFZoneID == "" {
var err error
cfg.CFZoneID, err = cfResolveZoneID(cfg.CFToken, cfg.Zone)
if err != nil {
fatal("resolving zone: %v", err)
}
}
var results []NodeResult
// DNS sync
log(cfg, "\n--- Cloudflare DNS ---")
dnsResults := syncDNS(cfg, pops)
results = append(results, dnsResults...)
// Tailscale sync
log(cfg, "\n--- Tailscale ---")
tsResults := syncTailscale(cfg, pops)
results = append(results, tsResults...)
// Firewall sync
if cfg.AWSKeyID != "" {
log(cfg, "\n--- AWS Firewall ---")
for _, p := range pops {
results = append(results, ensureFirewall(cfg, p))
}
}
outputResults(cfg, results)
return results
}
// --- Subcommand: deploy ---
// Sequential graceful deploy with mTLS cert push.
// Flow per POP: stop → deploy binary + certs → start → verify
// Auto-selects binary based on POP architecture (aarch64 or x86_64)
func cmdDeploy(cfg Config) []NodeResult {
pops := filterNodes(cfg, loadLivePOPs(cfg))
if len(pops) == 0 {
fatal("no live nodes to deploy to")
}
// Step 0: Firewall
if cfg.AWSKeyID != "" {
log(cfg, "--- Firewall ---")
for _, p := range pops {
ensureFirewall(cfg, p)
}
}
// Step 1: Resolve binaries (per-architecture)
log(cfg, "\n--- Build ---")
binaries := resolveVaultBinaries(cfg)
for arch, path := range binaries {
log(cfg, "Binary %s: %s", arch, path)
}
if cfg.DryRun {
var results []NodeResult
for _, p := range pops {
results = append(results, NodeResult{Node: p.Subdomain(), Action: "deploy", OK: true, Message: "would deploy (sequential, graceful)"})
}
outputResults(cfg, results)
return results
}
// Step 2: Sequential deploy to each POP
log(cfg, "\n--- Deploy to %d nodes (sequential, graceful) ---", len(pops))
var results []NodeResult
for _, p := range pops {
// Select binary based on POP architecture
arch := p.Arch
if arch == "" {
arch = "aarch64" // default
}
binaryPath, ok := binaries[arch]
if !ok {
r := NodeResult{Node: p.Subdomain(), Action: "deploy", Error: fmt.Sprintf("no binary for arch %s", arch)}
results = append(results, r)
log(cfg, "\n [%s] DEPLOY FAILED - no binary for arch %s", p.Subdomain(), arch)
break
}
r := deployToNode(cfg, p, binaryPath)
results = append(results, r)
if !r.OK {
log(cfg, "\n [%s] DEPLOY FAILED - stopping rollout", p.Subdomain())
break
}
}
outputResults(cfg, results)
return results
}
// deployToNode performs graceful stop → deploy → start for a single POP.
func deployToNode(cfg Config, p POP, binaryPath string) NodeResult {
name := p.Subdomain()
r := NodeResult{Node: name, Action: "deploy"}
log(cfg, "\n [%s] --- Deploying ---", name)
// Step 1: Graceful stop
log(cfg, " [%s] Stopping service (graceful)...", name)
if out, err := tsSshExec(name, "systemctl stop clavitor 2>/dev/null || true"); err != nil {
log(cfg, " [%s] Warning: stop command failed: %v", name, err)
_ = out
}
// Verify stopped
stopped := false
for i := 0; i < 15; i++ {
out, _ := tsSshExec(name, "systemctl is-active clavitor 2>&1 || true")
if strings.TrimSpace(out) != "active" {
stopped = true
break
}
time.Sleep(1 * time.Second)
}
if !stopped {
r.Error = "service did not stop within 15s"
return r
}
log(cfg, " [%s] Service stopped gracefully", name)
// Step 2: Upload binary
log(cfg, " [%s] Uploading binary...", name)
if err := scpToNode(name, binaryPath, "/tmp/clavitor-new"); err != nil {
r.Error = fmt.Sprintf("scp binary: %v", err)
return r
}
// Step 3: Generate and upload mTLS certs
certDir, err := ensurePOPCerts(cfg, p)
if err != nil {
log(cfg, " [%s] Warning: cert generation failed: %v", name, err)
} else if certDir != "" {
log(cfg, " [%s] Uploading mTLS certs from %s...", name, certDir)
keyPath := filepath.Join(certDir, name+".key")
certPath := filepath.Join(certDir, name+".crt")
chainPath := filepath.Join(certDir, "ca-chain.crt")
if err := scpToNode(name, keyPath, "/tmp/"+name+".key"); err != nil {
log(cfg, " [%s] Warning: failed to upload key: %v", name, err)
} else if err := scpToNode(name, certPath, "/tmp/"+name+".crt"); err != nil {
log(cfg, " [%s] Warning: failed to upload cert: %v", name, err)
} else if err := scpToNode(name, chainPath, "/tmp/ca-chain.crt"); err != nil {
log(cfg, " [%s] Warning: failed to upload chain: %v", name, err)
} else {
// Move certs into place
moveCmd := fmt.Sprintf("mv /tmp/%s.key /tmp/%s.crt /tmp/ca-chain.crt /opt/clavitor/certs/ && chmod 600 /opt/clavitor/certs/%s.key", name, name, name)
if _, err := tsSshExec(name, moveCmd); err != nil {
log(cfg, " [%s] Warning: failed to move certs: %v", name, err)
} else {
log(cfg, " [%s] mTLS certs deployed", name)
}
}
}
// Step 4: Install systemd service and binary
installScript := fmt.Sprintf(`set -e
mkdir -p /opt/clavitor/bin /opt/clavitor/data /opt/clavitor/certs
mv /tmp/clavitor-new /opt/clavitor/bin/clavitor
chmod +x /opt/clavitor/bin/clavitor
cat > /opt/clavitor/env << 'ENVEOF'
PORT=1984
VAULT_MODE=hosted
DATA_DIR=/opt/clavitor/data
TELEMETRY_FREQ=30
TELEMETRY_HOST=https://clavitor.ai/telemetry
TELEMETRY_TOKEN=clavitor-fleet-2026
TLS_DOMAIN=%s
CF_API_TOKEN=dSVz7JZtyK023q7kh4MMNmIggK1dahWdnBxVnP3O
TLS_CERT_DIR=/opt/clavitor/certs
TLS_EMAIL=ops@clavitor.ai
ENVEOF
cat > /etc/systemd/system/clavitor.service << 'UNITEOF'
[Unit]
Description=Clavitor Vault
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart=/opt/clavitor/bin/clavitor
EnvironmentFile=/opt/clavitor/env
WorkingDirectory=/opt/clavitor/data
User=root
# Graceful shutdown: SIGTERM triggers 5s graceful exit
KillSignal=SIGTERM
TimeoutStopSec=10
# Clean exit is success - stay down until explicitly started
SuccessExitStatus=0
Restart=no
[Install]
WantedBy=multi-user.target
UNITEOF
systemctl daemon-reload
systemctl enable clavitor`, p.DNS)
out, err := tsSshExec(name, installScript)
if err != nil {
r.Error = fmt.Sprintf("install: %v\n%s", err, out)
log(cfg, " [%s] FAIL: install error: %v", name, err)
return r
}
// Step 5: Start service
log(cfg, " [%s] Starting service...", name)
if out, err := tsSshExec(name, "systemctl start clavitor"); err != nil {
r.Error = fmt.Sprintf("start: %v\n%s", err, out)
log(cfg, " [%s] FAIL: start error", name)
return r
}
// Step 6: Verify health (systemctl status only - TLS cert may not match localhost)
log(cfg, " [%s] Verifying health...", name)
healthy := false
for i := 0; i < 15; i++ {
out, _ := tsSshExec(name, "systemctl is-active clavitor 2>&1 || true")
if strings.TrimSpace(out) == "active" {
healthy = true
break
}
time.Sleep(1 * time.Second)
}
if !healthy {
r.Error = "service failed to become active"
log(cfg, " [%s] FAIL: service not active", name)
return r
}
log(cfg, " [%s] OK - Deployed and healthy", name)
r.OK = true
r.Message = "deployed gracefully"
return r
}
func resolveVaultBinary(cfg Config) string {
// Pre-built binary takes precedence
if cfg.VaultBinary != "" {
if _, err := os.Stat(cfg.VaultBinary); err != nil {
fatal("vault binary not found: %s", cfg.VaultBinary)
}
return cfg.VaultBinary
}
// Fall back to building from source
srcDir, _ := filepath.Abs(cfg.VaultSrc)
outPath := filepath.Join(srcDir, "clavitor-linux-arm64")
cmd := exec.Command("go", "build", "-o", outPath, "./cmd/clavitor")
cmd.Dir = srcDir
cmd.Env = append(os.Environ(),
"GOOS=linux",
"GOARCH=arm64",
"CGO_ENABLED=0",
)
out, err := cmd.CombinedOutput()
if err != nil {
fatal("build failed: %v\n%s", err, string(out))
}
return outPath
}
// resolveVaultBinaries returns a map of architecture -> binary path
// Supports aarch64 (ARM) and x86_64 (AMD64)
func resolveVaultBinaries(cfg Config) map[string]string {
binaries := make(map[string]string)
// If specific binary provided, use for all (legacy behavior)
if cfg.VaultBinary != "" {
if _, err := os.Stat(cfg.VaultBinary); err != nil {
fatal("vault binary not found: %s", cfg.VaultBinary)
}
// Detect arch from filename or assume aarch64
arch := "aarch64"
if strings.Contains(cfg.VaultBinary, "amd64") || strings.Contains(cfg.VaultBinary, "x86_64") {
arch = "x86_64"
}
binaries[arch] = cfg.VaultBinary
// Also create alias for default
if _, ok := binaries["aarch64"]; !ok {
binaries["aarch64"] = cfg.VaultBinary
}
return binaries
}
// Auto-detect binaries in standard locations
locations := []struct {
path string
arch string
}{
{"/opt/clavitor.ai/clavitor-linux-arm64", "aarch64"},
{"/opt/clavitor.ai/clavitor-linux-amd64", "x86_64"},
{"./clavitor-linux-arm64", "aarch64"},
{"./clavitor-linux-amd64", "x86_64"},
}
for _, loc := range locations {
if _, err := os.Stat(loc.path); err == nil {
binaries[loc.arch] = loc.path
log(cfg, "Found %s binary: %s", loc.arch, loc.path)
}
}
// Ensure we have at least aarch64 (default)
if _, ok := binaries["aarch64"]; !ok {
fatal("No binary found. Provide -binary or place clavitor-linux-arm64 in /opt/clavitor.ai/ or current directory")
}
return binaries
}
// ensurePOPCerts generates certificates for a POP if they don't exist
func ensurePOPCerts(cfg Config, p POP) (string, error) {
name := p.Subdomain()
certDir := filepath.Join(cfg.CADir, name)
// Check if certs already exist
keyPath := filepath.Join(certDir, name+".key")
certPath := filepath.Join(certDir, name+".crt")
chainPath := filepath.Join(certDir, "ca-chain.crt")
if _, err := os.Stat(keyPath); err == nil {
if _, err := os.Stat(certPath); err == nil {
// Certs exist, skip generation
return certDir, nil
}
}
// Generate certificates
log(cfg, " [%s] Generating mTLS certificates...", name)
// Ensure CA exists
rootKeyPath := filepath.Join(cfg.CADir, "root-ca.key")
rootCertPath := filepath.Join(cfg.CADir, "root-ca.crt")
intKeyPath := filepath.Join(cfg.CADir, "intermediate-ca.key")
intCertPath := filepath.Join(cfg.CADir, "intermediate-ca.crt")
// Check if CA exists, if not initialize it using OpenSSL
if _, err := os.Stat(rootKeyPath); os.IsNotExist(err) {
log(cfg, " [%s] Initializing CA with OpenSSL...", name)
// Generate root CA key
cmd := exec.Command("openssl", "ecparam", "-name", "prime256v1", "-genkey", "-noout", "-out", rootKeyPath)
if output, err := cmd.CombinedOutput(); err != nil {
return "", fmt.Errorf("generate root key: %w\n%s", err, output)
}
// Generate root CA cert
cmd = exec.Command("openssl", "req", "-new", "-x509", "-key", rootKeyPath,
"-out", rootCertPath, "-days", "3650",
"-subj", "/C=US/O=Clavitor/OU=Infrastructure/CN=Clavitor Root CA")
if output, err := cmd.CombinedOutput(); err != nil {
return "", fmt.Errorf("generate root cert: %w\n%s", err, output)
}
// Generate intermediate CA key
cmd = exec.Command("openssl", "ecparam", "-name", "prime256v1", "-genkey", "-noout", "-out", intKeyPath)
if output, err := cmd.CombinedOutput(); err != nil {
return "", fmt.Errorf("generate intermediate key: %w\n%s", err, output)
}
// Generate intermediate CA CSR
intCSRPath := filepath.Join(cfg.CADir, "intermediate-ca.csr")
cmd = exec.Command("openssl", "req", "-new", "-key", intKeyPath,
"-out", intCSRPath,
"-subj", "/C=US/O=Clavitor/OU=POP Infrastructure/CN=Clavitor Intermediate CA")
if output, err := cmd.CombinedOutput(); err != nil {
return "", fmt.Errorf("generate intermediate csr: %w\n%s", err, output)
}
// Sign intermediate CA cert with root
cmd = exec.Command("openssl", "x509", "-req", "-in", intCSRPath,
"-CA", rootCertPath, "-CAkey", rootKeyPath, "-CAcreateserial",
"-out", intCertPath, "-days", "730")
if output, err := cmd.CombinedOutput(); err != nil {
return "", fmt.Errorf("sign intermediate cert: %w\n%s", err, output)
}
// Clean up CSR
os.Remove(intCSRPath)
// Set permissions
os.Chmod(rootKeyPath, 0600)
os.Chmod(intKeyPath, 0600)
os.Chmod(rootCertPath, 0644)
os.Chmod(intCertPath, 0644)
}
// Generate using OpenSSL (avoids Go crypto/rand entropy blocking on fresh VMs)
// keyPath, certPath, chainPath already declared above
// Generate key and CSR
csrPath := filepath.Join(certDir, name+".csr")
cmd := exec.Command("openssl", "req", "-newkey", "ec", "-pkeyopt", "ec_paramgen_curve:prime256v1",
"-keyout", keyPath, "-nodes", "-out", csrPath,
"-subj", fmt.Sprintf("/O=Clavitor/OU=POP/CN=%s", name))
if output, err := cmd.CombinedOutput(); err != nil {
return "", fmt.Errorf("openssl key gen: %w\n%s", err, output)
}
// Sign with intermediate CA
cmd = exec.Command("openssl", "x509", "-req", "-in", csrPath,
"-CA", intCertPath, "-CAkey", intKeyPath, "-CAcreateserial",
"-out", certPath, "-days", "90", "-sha256")
if output, err := cmd.CombinedOutput(); err != nil {
return "", fmt.Errorf("openssl sign: %w\n%s", err, output)
}
// Set permissions
os.Chmod(keyPath, 0600)
os.Chmod(certPath, 0644)
// Copy CA chain
caChainSrc := filepath.Join(cfg.CADir, "ca-chain.crt")
if _, err := os.Stat(caChainSrc); os.IsNotExist(err) {
// Create CA chain
rootPEM, _ := os.ReadFile(rootCertPath)
intPEM, _ := os.ReadFile(intCertPath)
chain := append(intPEM, '\n')
chain = append(chain, rootPEM...)
os.WriteFile(caChainSrc, chain, 0644)
}
chainData, _ := os.ReadFile(caChainSrc)
os.WriteFile(chainPath, chainData, 0644)
// Clean up CSR
os.Remove(csrPath)
log(cfg, " [%s] Certificates generated in %s", name, certDir)
return certDir, nil
}
// --- Subcommand: status ---
func cmdStatus(cfg Config) []NodeResult {
pops := filterNodes(cfg, loadLivePOPs(cfg))
log(cfg, "Checking %d nodes...\n", len(pops))
statusCmd := "echo SERVICE=$(systemctl is-active clavitor 2>/dev/null || echo stopped); echo HOST=$(hostname); uptime -p 2>/dev/null; echo TS=$(tailscale ip -4 2>/dev/null || echo none)"
results := parallelExec(pops, 4, func(p POP) NodeResult {
name := p.Subdomain()
r := NodeResult{Node: name, Action: "status"}
out, err := nodeExec(cfg, p, statusCmd)
if err != nil {
r.Error = fmt.Sprintf("unreachable: %v", err)
log(cfg, " FAIL %s — %v", name, err)
return r
}
r.OK = true
r.Message = strings.TrimSpace(out)
log(cfg, " OK %s — %s", name, oneLineStatus(out))
return r
})
outputResults(cfg, results)
return results
}
// --- Subcommand: exec ---
func cmdExec(cfg Config, command string) []NodeResult {
pops := filterNodes(cfg, loadLivePOPs(cfg))
log(cfg, "Running on %d nodes: %s\n", len(pops), command)
results := parallelExec(pops, 4, func(p POP) NodeResult {
name := p.Subdomain()
r := NodeResult{Node: name, Action: "exec"}
out, err := nodeExec(cfg, p, command)
if err != nil {
r.Error = fmt.Sprintf("%v\n%s", err, out)
log(cfg, "--- %s (FAIL) ---\n%s\n%v", name, out, err)
return r
}
r.OK = true
r.Message = strings.TrimSpace(out)
log(cfg, "--- %s ---\n%s", name, r.Message)
return r
})
outputResults(cfg, results)
return results
}
// --- Subcommand: update ---
// Runs system updates on all live nodes.
func cmdUpdate(cfg Config) []NodeResult {
pops := filterNodes(cfg, loadLivePOPs(cfg))
log(cfg, "Updating %d nodes...\n", len(pops))
updateCmd := `export DEBIAN_FRONTEND=noninteractive; ` +
`(apt-get update -qq && apt-get upgrade -y -qq && apt-get install -y -qq unattended-upgrades 2>/dev/null) || ` +
`(yum update -y -q 2>/dev/null) || ` +
`(dnf update -y -q 2>/dev/null) || true; ` +
`echo "update done"`
results := parallelExec(pops, 4, func(p POP) NodeResult {
name := p.Subdomain()
r := NodeResult{Node: name, Action: "update"}
log(cfg, " [%s] updating...", name)
out, err := tsSshExec(name, updateCmd)
if err != nil {
r.Error = fmt.Sprintf("%v", err)
log(cfg, " [%s] FAIL: %v", name, err)
return r
}
r.OK = true
r.Message = "updated"
log(cfg, " [%s] done", name)
_ = out
return r
})
outputResults(cfg, results)
return results
}
// --- Subcommand: maintenance ---
func cmdMaintenance(cfg Config, args []string) {
action := args[0]
reason := ""
if len(args) > 1 {
reason = strings.Join(args[1:], " ")
}
by := "pop-sync"
if user := os.Getenv("USER"); user != "" {
by = "pop-sync/" + user
}
switch action {
case "on", "start":
body, _ := json.Marshal(map[string]string{"action": "start", "reason": reason, "by": by})
req, _ := http.NewRequest("POST", "https://clavitor.ai/noc/api/maintenance?pin=250365", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
fatal("maintenance start: %v", err)
}
defer resp.Body.Close()
io.ReadAll(resp.Body)
log(cfg, "Maintenance ON: %s (by %s)", reason, by)
case "off", "stop":
body, _ := json.Marshal(map[string]string{"action": "stop", "reason": reason, "by": by})
req, _ := http.NewRequest("POST", "https://clavitor.ai/noc/api/maintenance?pin=250365", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
fatal("maintenance stop: %v", err)
}
defer resp.Body.Close()
io.ReadAll(resp.Body)
log(cfg, "Maintenance OFF (by %s)", by)
case "status":
resp, err := http.Get("https://clavitor.ai/noc/api/maintenance?pin=250365")
if err != nil {
fatal("maintenance status: %v", err)
}
defer resp.Body.Close()
data, _ := io.ReadAll(resp.Body)
if cfg.JSONOut {
fmt.Println(string(data))
} else {
var result struct {
Active bool `json:"active"`
Windows []struct {
StartAt int64 `json:"start_at"`
EndAt *int64 `json:"end_at"`
Reason string `json:"reason"`
StartBy string `json:"started_by"`
EndBy string `json:"ended_by"`
} `json:"windows"`
}
json.Unmarshal(data, &result)
if result.Active {
fmt.Println("MAINTENANCE ACTIVE")
} else {
fmt.Println("No active maintenance")
}
for _, w := range result.Windows {
end := "ongoing"
if w.EndAt != nil {
end = time.Unix(*w.EndAt, 0).Format("2006-01-02 15:04:05")
}
fmt.Printf(" %s — %s %q by=%s\n",
time.Unix(w.StartAt, 0).Format("2006-01-02 15:04:05"),
end, w.Reason, w.StartBy)
}
}
default:
fatal("usage: pop-sync maintenance <on|off|status> [reason]")
}
}
// --- Subcommand: firewall ---
// Ensures every POP's security group has exactly port 1984 open, nothing else.
func cmdFirewall(cfg Config) []NodeResult {
if cfg.AWSKeyID == "" || cfg.AWSSecretKey == "" {
fatal("AWS credentials required: set AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY or -aws-key/-aws-secret")
}
pops := filterNodes(cfg, loadLivePOPs(cfg))
log(cfg, "Checking firewall for %d nodes...\n", len(pops))
var results []NodeResult
for _, p := range pops {
r := ensureFirewall(cfg, p)
results = append(results, r)
}
outputResults(cfg, results)
return results
}
// ensureFirewall makes sure the security group for a POP only allows inbound TCP 1984.
func ensureFirewall(cfg Config, pop POP) NodeResult {
name := pop.Subdomain()
region := pop.RegionName
r := NodeResult{Node: name, Action: "firewall"}
if pop.InstanceID == "" {
r.Error = "no instance_id"
return r
}
ctx := context.Background()
awsCfg, err := awsconfig.LoadDefaultConfig(ctx,
awsconfig.WithRegion(region),
awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.AWSKeyID, cfg.AWSSecretKey, "")),
)
if err != nil {
r.Error = fmt.Sprintf("aws config: %v", err)
return r
}
client := ec2.NewFromConfig(awsCfg)
// Get instance's security group
descOut, err := client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{
InstanceIds: []string{pop.InstanceID},
})
if err != nil {
r.Error = fmt.Sprintf("describe instance: %v", err)
return r
}
if len(descOut.Reservations) == 0 || len(descOut.Reservations[0].Instances) == 0 {
r.Error = "instance not found"
return r
}
inst := descOut.Reservations[0].Instances[0]
if len(inst.SecurityGroups) == 0 {
r.Error = "no security groups"
return r
}
sgID := *inst.SecurityGroups[0].GroupId
// Get current rules
sgOut, err := client.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{
GroupIds: []string{sgID},
})
if err != nil {
r.Error = fmt.Sprintf("describe sg: %v", err)
return r
}
sg := sgOut.SecurityGroups[0]
// Check what needs changing
has1984 := false
var toRevoke []ec2types.IpPermission
for _, perm := range sg.IpPermissions {
if perm.FromPort != nil && *perm.FromPort == 1984 && perm.ToPort != nil && *perm.ToPort == 1984 && perm.IpProtocol != nil && *perm.IpProtocol == "tcp" {
has1984 = true
} else {
toRevoke = append(toRevoke, perm)
}
}
changes := 0
// Remove unwanted rules
if len(toRevoke) > 0 && !cfg.DryRun {
_, err := client.RevokeSecurityGroupIngress(ctx, &ec2.RevokeSecurityGroupIngressInput{
GroupId: &sgID,
IpPermissions: toRevoke,
})
if err != nil {
r.Error = fmt.Sprintf("revoke rules: %v", err)
return r
}
for _, p := range toRevoke {
port := int32(0)
if p.FromPort != nil {
port = *p.FromPort
}
log(cfg, " [%s] removed port %d from %s", name, port, sgID)
}
changes += len(toRevoke)
} else if len(toRevoke) > 0 {
for _, p := range toRevoke {
port := int32(0)
if p.FromPort != nil {
port = *p.FromPort
}
log(cfg, " [%s] would remove port %d from %s", name, port, sgID)
}
changes += len(toRevoke)
}
// Add 1984 if missing
if !has1984 && !cfg.DryRun {
proto := "tcp"
port := int32(1984)
_, err := client.AuthorizeSecurityGroupIngress(ctx, &ec2.AuthorizeSecurityGroupIngressInput{
GroupId: &sgID,
IpPermissions: []ec2types.IpPermission{{
IpProtocol: &proto,
FromPort: &port,
ToPort: &port,
IpRanges: []ec2types.IpRange{{CidrIp: strPtr("0.0.0.0/0")}},
}},
})
if err != nil {
r.Error = fmt.Sprintf("add 1984: %v", err)
return r
}
log(cfg, " [%s] opened port 1984 on %s", name, sgID)
changes++
} else if !has1984 {
log(cfg, " [%s] would open port 1984 on %s", name, sgID)
changes++
}
if changes == 0 {
log(cfg, " [%s] OK — port 1984 only (%s)", name, sgID)
}
r.OK = true
r.Message = fmt.Sprintf("sg=%s port=1984", sgID)
return r
}
func strPtr(s string) *string { return &s }
func int32Ptr(i int32) *int32 { return &i }
// --- Subcommand: provision ---
// Spins up t4g.nano EC2 instances for planned POPs.
func cmdProvision(cfg Config, cities []string) []NodeResult {
// Load all POPs from DB
allPOPs, err := readPOPs(cfg.DBPath)
if err != nil {
fatal("reading DB: %v", err)
}
// Match cities to planned POPs
var targets []POP
for _, city := range cities {
found := false
for _, p := range allPOPs {
if strings.EqualFold(p.City, city) && p.Status == "planned" {
targets = append(targets, p)
found = true
break
}
}
if !found {
log(cfg, "WARNING: %q not found or not planned, skipping", city)
}
}
if len(targets) == 0 {
fatal("no matching planned POPs found")
}
log(cfg, "Provisioning %d nodes...\n", len(targets))
var results []NodeResult
for _, p := range targets {
var r NodeResult
switch strings.ToLower(p.Provider) {
case "ishosting":
r = provisionISHosting(cfg, p)
case "aws":
r = provisionNode(cfg, p)
default:
r = NodeResult{Node: p.City, Action: "provision", Error: fmt.Sprintf("unknown provider: %s — use bootstrap for manual VPS", p.Provider)}
}
results = append(results, r)
}
outputResults(cfg, results)
return results
}
func provisionNode(cfg Config, pop POP) NodeResult {
region := pop.RegionName
r := NodeResult{Node: pop.City, Action: "provision"}
ctx := context.Background()
awsCfg, err := awsconfig.LoadDefaultConfig(ctx,
awsconfig.WithRegion(region),
awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.AWSKeyID, cfg.AWSSecretKey, "")),
)
if err != nil {
r.Error = fmt.Sprintf("aws config: %v", err)
return r
}
ec2Client := ec2.NewFromConfig(awsCfg)
ssmClient := ssm.NewFromConfig(awsCfg)
// Auto-generate DNS subdomain if not set
sub := pop.Subdomain()
if sub == "" {
prefix := strings.ToLower(pop.Country)
// Find next available ordinal
allPOPs, _ := readPOPs(cfg.DBPath)
ordinal := 1
for {
candidate := fmt.Sprintf("%s%d", prefix, ordinal)
taken := false
for _, p := range allPOPs {
if p.Subdomain() == candidate {
taken = true
break
}
}
if !taken {
sub = candidate
break
}
ordinal++
}
// Persist to DB immediately
dns := sub + "." + cfg.Zone
pop.DNS = dns
localDB, _ := sql.Open("sqlite", cfg.DBPath)
localDB.Exec(`UPDATE pops SET dns=? WHERE pop_id=?`, dns, pop.PopID)
localDB.Close()
log(cfg, " [%s] auto-assigned DNS: %s", pop.City, dns)
}
dns := sub + "." + cfg.Zone
if cfg.DryRun {
r.OK = true
r.Message = fmt.Sprintf("would provision %s (%s) as %s", pop.City, region, dns)
log(cfg, " [%s] DRY RUN: %s", pop.City, r.Message)
return r
}
// --- Step 1: Launch EC2 ---
log(cfg, " [%s] looking up AMI in %s...", pop.City, region)
amiParam, err := ssmClient.GetParameter(ctx, &ssm.GetParameterInput{
Name: strPtr("/aws/service/ami-amazon-linux-latest/al2023-ami-kernel-default-arm64"),
})
if err != nil {
r.Error = fmt.Sprintf("AMI lookup: %v", err)
return r
}
amiID := *amiParam.Parameter.Value
sgID, err := ensureSecurityGroup(ctx, ec2Client, region, pop.City, cfg)
if err != nil {
r.Error = fmt.Sprintf("security group: %v", err)
return r
}
log(cfg, " [%s] launching t4g.nano (AMI: %s, SG: %s)...", pop.City, amiID, sgID)
runOut, err := ec2Client.RunInstances(ctx, &ec2.RunInstancesInput{
ImageId: &amiID,
InstanceType: ec2types.InstanceTypeT4gNano,
MinCount: int32Ptr(1),
MaxCount: int32Ptr(1),
SecurityGroupIds: []string{sgID},
IamInstanceProfile: &ec2types.IamInstanceProfileSpecification{
Name: strPtr("clavitor-ssm-profile"), // NOTE: existing AWS IAM profile may still be named vault1984-ssm-profile — rename in AWS console to match
},
TagSpecifications: []ec2types.TagSpecification{{
ResourceType: ec2types.ResourceTypeInstance,
Tags: []ec2types.Tag{
{Key: strPtr("Name"), Value: strPtr("clavitor-" + sub)},
{Key: strPtr("clavitor-pop"), Value: strPtr(sub)},
},
}},
})
if err != nil {
r.Error = fmt.Sprintf("launch: %v", err)
return r
}
instanceID := *runOut.Instances[0].InstanceId
log(cfg, " [%s] instance: %s — waiting for IP...", pop.City, instanceID)
waiter := ec2.NewInstanceRunningWaiter(ec2Client)
if err := waiter.Wait(ctx, &ec2.DescribeInstancesInput{InstanceIds: []string{instanceID}}, 3*time.Minute); err != nil {
r.Error = fmt.Sprintf("wait: %v", err)
return r
}
descOut, err := ec2Client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{InstanceIds: []string{instanceID}})
if err != nil || len(descOut.Reservations) == 0 || len(descOut.Reservations[0].Instances) == 0 {
r.Error = fmt.Sprintf("describe: %v", err)
return r
}
publicIP := ""
if descOut.Reservations[0].Instances[0].PublicIpAddress != nil {
publicIP = *descOut.Reservations[0].Instances[0].PublicIpAddress
}
if publicIP == "" {
r.Error = "no public IP"
return r
}
log(cfg, " [%s] IP: %s", pop.City, publicIP)
// --- Step 2: Update local DB ---
log(cfg, " [%s] updating DB...", pop.City)
localDB, err := sql.Open("sqlite", cfg.DBPath)
if err != nil {
r.Error = fmt.Sprintf("open db: %v", err)
return r
}
localDB.Exec(`UPDATE pops SET instance_id=?, ip=?, dns=?, status='live' WHERE pop_id=?`,
instanceID, publicIP, dns, pop.PopID)
localDB.Close()
// Reload pop with updated fields
pop.InstanceID = instanceID
pop.IP = publicIP
pop.DNS = dns
pop.Status = "live"
// --- Step 3: DNS ---
log(cfg, " [%s] creating DNS %s → %s...", pop.City, dns, publicIP)
if cfg.CFToken != "" {
zoneID := cfg.CFZoneID
if zoneID == "" {
zoneID, _ = cfResolveZoneID(cfg.CFToken, cfg.Zone)
}
if zoneID != "" {
cfCreateRecord(cfg.CFToken, zoneID, dns, publicIP)
}
}
// --- Step 4: Wait for SSM agent, then bootstrap Tailscale ---
log(cfg, " [%s] waiting for SSM agent (up to 90s)...", pop.City)
time.Sleep(30 * time.Second) // SSM agent takes ~30s to register after boot
if cfg.TSAuthKey != "" {
if err := ssmBootstrapTailscale(cfg, pop); err != nil {
log(cfg, " [%s] tailscale warning: %v", pop.City, err)
}
}
// --- Step 5: Deploy vault binary via Tailscale SCP + SSH ---
log(cfg, " [%s] deploying vault...", pop.City)
binaryPath := resolveVaultBinary(cfg)
if err := scpToNode(sub, binaryPath, "/tmp/clavitor-new"); err != nil {
r.Error = fmt.Sprintf("scp binary: %v", err)
return r
}
installScript := fmt.Sprintf(`set -e
mkdir -p /opt/clavitor/bin /opt/clavitor/data /opt/clavitor/certs
mv /tmp/clavitor-new /opt/clavitor/bin/clavitor
chmod +x /opt/clavitor/bin/clavitor
cat > /opt/clavitor/env << 'ENVEOF'
PORT=1984
VAULT_MODE=hosted
DATA_DIR=/opt/clavitor/data
TELEMETRY_FREQ=30
TELEMETRY_HOST=https://clavitor.ai/telemetry
TELEMETRY_TOKEN=clavitor-fleet-2026
TLS_DOMAIN=%s
CF_API_TOKEN=dSVz7JZtyK023q7kh4MMNmIggK1dahWdnBxVnP3O
TLS_CERT_DIR=/opt/clavitor/certs
TLS_EMAIL=ops@clavitor.ai
ENVEOF
cat > /etc/systemd/system/clavitor.service << 'UNITEOF'
[Unit]
Description=Clavitor Vault
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart=/opt/clavitor/bin/clavitor
EnvironmentFile=/opt/clavitor/env
WorkingDirectory=/opt/clavitor/data
Restart=always
RestartSec=5
User=root
[Install]
WantedBy=multi-user.target
UNITEOF
systemctl daemon-reload
systemctl enable clavitor
systemctl restart clavitor
sleep 3
systemctl is-active clavitor`, dns)
out, err := tsSshExec(sub, installScript)
if err != nil {
r.Error = fmt.Sprintf("deploy: %v\n%s", err, out)
return r
}
log(cfg, " [%s] service: %s", pop.City, strings.TrimSpace(out))
// --- Step 6: Verify ---
log(cfg, " [%s] verifying TLS...", pop.City)
time.Sleep(5 * time.Second)
verifyURL := fmt.Sprintf("https://%s:1984/ping", dns)
resp, err := http.Get(verifyURL)
if err == nil {
resp.Body.Close()
log(cfg, " [%s] TLS verified ✓", pop.City)
} else {
log(cfg, " [%s] TLS not ready yet (cert may take a minute): %v", pop.City, err)
}
r.OK = true
r.Message = fmt.Sprintf("%s → %s → %s (%s) — live", instanceID, publicIP, dns, region)
log(cfg, "\n [%s] DONE: %s\n", pop.City, r.Message)
return r
}
// --- Subcommand: bootstrap ---
// For non-AWS instances: takes an IP + password, installs Tailscale, hardens, deploys vault.
func cmdBootstrap(cfg Config, city, ip, password string) []NodeResult {
// Find the POP in the DB
allPOPs, err := readPOPs(cfg.DBPath)
if err != nil {
fatal("reading DB: %v", err)
}
var pop *POP
for _, p := range allPOPs {
if strings.EqualFold(p.City, city) {
pop = &p
break
}
}
if pop == nil {
fatal("city %q not found in DB", city)
}
sub := pop.Subdomain()
if sub == "" {
sub = strings.ToLower(pop.Country) + "1"
}
dns := sub + "." + cfg.Zone
if pop.DNS == "" {
pop.DNS = dns
} else {
dns = pop.DNS
sub = pop.Subdomain()
}
r := NodeResult{Node: city, Action: "bootstrap"}
if cfg.DryRun {
r.OK = true
r.Message = fmt.Sprintf("would bootstrap %s (%s) as %s", city, ip, dns)
log(cfg, "DRY RUN: %s", r.Message)
return []NodeResult{r}
}
// Helper: run command on fresh instance (password or key auth)
sshRaw := func(command string) (string, error) {
var cmd *exec.Cmd
sshOpts := []string{"-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=10", "-o", "UserKnownHostsFile=/dev/null", "-o", "LogLevel=ERROR"}
if password != "" {
cmd = exec.Command("sshpass", append([]string{"-p", password, "ssh"}, append(sshOpts, "root@"+ip, command)...)...)
} else {
cmd = exec.Command("ssh", append(sshOpts, "root@"+ip, command)...)
}
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
if err != nil {
return stdout.String() + stderr.String(), err
}
return stdout.String(), nil
}
scpRaw := func(localPath, remotePath string) error {
sshOpts := []string{"-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=10", "-o", "UserKnownHostsFile=/dev/null", "-o", "LogLevel=ERROR"}
var cmd *exec.Cmd
if password != "" {
cmd = exec.Command("sshpass", append([]string{"-p", password, "scp"}, append(sshOpts, localPath, "root@"+ip+":"+remotePath)...)...)
} else {
cmd = exec.Command("scp", append(sshOpts, localPath, "root@"+ip+":"+remotePath)...)
}
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("%v: %s", err, string(out))
}
return nil
}
// --- Step 1: Test connectivity ---
log(cfg, " [%s] connecting to %s...", city, ip)
out, err := sshRaw("hostname && uname -m")
if err != nil {
r.Error = fmt.Sprintf("ssh connect: %v\n%s", err, out)
return []NodeResult{r}
}
arch := "amd64"
if strings.Contains(out, "aarch64") {
arch = "arm64"
}
log(cfg, " [%s] connected (%s)", city, strings.TrimSpace(out))
// --- Step 2: System update ---
log(cfg, " [%s] updating system...", city)
sshRaw("export DEBIAN_FRONTEND=noninteractive; apt-get update -qq && apt-get upgrade -y -qq && apt-get install -y -qq unattended-upgrades 2>/dev/null || (yum update -y -q 2>/dev/null) || true")
log(cfg, " [%s] system updated", city)
// --- Step 3: Install Tailscale ---
// (renumbered from step 2)
log(cfg, " [%s] installing Tailscale...", city)
out, err = sshRaw("command -v tailscale >/dev/null 2>&1 && echo 'already installed' || (curl -fsSL https://tailscale.com/install.sh | sh)")
if err != nil {
r.Error = fmt.Sprintf("tailscale install: %v", err)
return []NodeResult{r}
}
log(cfg, " [%s] %s", city, lastLines(out, 1))
// --- Step 3: Join tailnet ---
log(cfg, " [%s] joining tailnet as %s...", city, sub)
if cfg.TSAuthKey == "" {
r.Error = "TS_AUTHKEY required for bootstrap"
return []NodeResult{r}
}
out, err = sshRaw(fmt.Sprintf("systemctl enable --now tailscaled 2>/dev/null; sleep 2; tailscale up --authkey=%s --hostname=%s --ssh --reset; tailscale ip -4", cfg.TSAuthKey, sub))
if err != nil {
r.Error = fmt.Sprintf("tailscale join: %v", err)
return []NodeResult{r}
}
tsIP := strings.TrimSpace(out)
log(cfg, " [%s] Tailscale IP: %s", city, tsIP)
// --- Step 4: Harden ---
log(cfg, " [%s] hardening...", city)
sshRaw(fmt.Sprintf("hostnamectl set-hostname %s 2>/dev/null; hostname %s 2>/dev/null", sub, sub))
sshRaw(`sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config && systemctl restart sshd 2>/dev/null || systemctl restart ssh 2>/dev/null`)
// Firewall: allow only 1984 + tailscale
sshRaw(`command -v ufw >/dev/null 2>&1 && (ufw default deny incoming; ufw allow in on tailscale0; ufw allow 1984/tcp; ufw --force enable) || (command -v firewall-cmd >/dev/null 2>&1 && (firewall-cmd --permanent --add-port=1984/tcp; firewall-cmd --reload) || iptables -A INPUT -p tcp --dport 1984 -j ACCEPT)`)
log(cfg, " [%s] hardened (password auth disabled, firewall set)", city)
// --- Step 5: Deploy vault ---
log(cfg, " [%s] deploying vault (%s)...", city, arch)
binaryPath := resolveVaultBinary(cfg)
// Check if we need the other arch
if arch == "amd64" {
amdPath := strings.Replace(binaryPath, "arm64", "amd64", 1)
if _, err := os.Stat(amdPath); err == nil {
binaryPath = amdPath
}
}
// Now use Tailscale SSH (password auth may already be disabled)
if err := scpToNode(sub, binaryPath, "/tmp/clavitor-new"); err != nil {
// Fallback to password SCP if Tailscale not ready yet
log(cfg, " [%s] Tailscale SCP not ready, using password...", city)
if err := scpRaw(binaryPath, "/tmp/clavitor-new"); err != nil {
r.Error = fmt.Sprintf("scp binary: %v", err)
return []NodeResult{r}
}
}
installScript := fmt.Sprintf(`set -e
mkdir -p /opt/clavitor/bin /opt/clavitor/data /opt/clavitor/certs
mv /tmp/clavitor-new /opt/clavitor/bin/clavitor
chmod +x /opt/clavitor/bin/clavitor
cat > /opt/clavitor/env << 'ENVEOF'
PORT=1984
VAULT_MODE=hosted
DATA_DIR=/opt/clavitor/data
TELEMETRY_FREQ=30
TELEMETRY_HOST=https://clavitor.ai/telemetry
TELEMETRY_TOKEN=clavitor-fleet-2026
TLS_DOMAIN=%s
CF_API_TOKEN=dSVz7JZtyK023q7kh4MMNmIggK1dahWdnBxVnP3O
TLS_CERT_DIR=/opt/clavitor/certs
TLS_EMAIL=ops@clavitor.ai
ENVEOF
cat > /etc/systemd/system/clavitor.service << 'UNITEOF'
[Unit]
Description=Clavitor Vault
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart=/opt/clavitor/bin/clavitor
EnvironmentFile=/opt/clavitor/env
WorkingDirectory=/opt/clavitor/data
Restart=always
RestartSec=5
User=root
[Install]
WantedBy=multi-user.target
UNITEOF
systemctl daemon-reload
systemctl enable clavitor
systemctl restart clavitor
sleep 3
systemctl is-active clavitor`, dns)
out, err = tsSshExec(sub, installScript)
if err != nil {
// Fallback to password SSH
out, err = sshRaw(installScript)
}
if err != nil {
r.Error = fmt.Sprintf("deploy: %v\n%s", err, out)
return []NodeResult{r}
}
log(cfg, " [%s] service: %s", city, strings.TrimSpace(out))
// --- Step 6: Update DB + DNS ---
log(cfg, " [%s] updating DB + DNS...", city)
localDB, _ := sql.Open("sqlite", cfg.DBPath)
localDB.Exec(`UPDATE pops SET ip=?, dns=?, status='live' WHERE pop_id=?`, ip, dns, pop.PopID)
localDB.Close()
if cfg.CFToken != "" {
zoneID := cfg.CFZoneID
if zoneID == "" {
zoneID, _ = cfResolveZoneID(cfg.CFToken, cfg.Zone)
}
if zoneID != "" {
cfCreateRecord(cfg.CFToken, zoneID, dns, ip)
}
}
// --- Step 7: Verify TLS ---
log(cfg, " [%s] verifying TLS...", city)
time.Sleep(5 * time.Second)
resp, err := http.Get(fmt.Sprintf("https://%s:1984/ping", dns))
if err == nil {
resp.Body.Close()
log(cfg, " [%s] TLS verified", city)
} else {
log(cfg, " [%s] TLS not ready yet: %v", city, err)
}
r.OK = true
r.Message = fmt.Sprintf("%s → %s (%s) — live", ip, dns, arch)
log(cfg, "\n [%s] DONE: %s\n", city, r.Message)
return []NodeResult{r}
}
// --- ishosting provisioner ---
func provisionISHosting(cfg Config, pop POP) NodeResult {
r := NodeResult{Node: pop.City, Action: "provision"}
if cfg.ISHostingKey == "" {
r.Error = "ISHOSTING_API_KEY required"
return r
}
sub := pop.Subdomain()
if sub == "" {
sub = strings.ToLower(pop.Country) + "1"
}
dns := sub + "." + cfg.Zone
if pop.DNS != "" {
dns = pop.DNS
sub = pop.Subdomain()
}
if cfg.DryRun {
r.OK = true
r.Message = fmt.Sprintf("would provision %s via ishosting as %s", pop.City, dns)
log(cfg, " [%s] DRY RUN: %s", pop.City, r.Message)
return r
}
ishAPI := func(method, path string, body interface{}) (map[string]interface{}, error) {
var reqBody io.Reader
if body != nil {
data, _ := json.Marshal(body)
reqBody = bytes.NewReader(data)
}
req, _ := http.NewRequest(method, "https://api.ishosting.com"+path, reqBody)
req.Header.Set("X-Api-Token", cfg.ISHostingKey)
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
var result map[string]interface{}
json.Unmarshal(respBody, &result)
if resp.StatusCode >= 400 {
return result, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
}
return result, nil
}
// Step 1: Find cheapest plan for this country
log(cfg, " [%s] finding plan for %s...", pop.City, pop.Country)
plans, err := ishAPI("GET", "/services/vps/plans", nil)
if err != nil {
r.Error = fmt.Sprintf("list plans: %v", err)
return r
}
// Find the cheapest plan available in this country
var planCode string
var planPrice float64 = 999
if items, ok := plans["items"].([]interface{}); ok {
for _, item := range items {
plan, _ := item.(map[string]interface{})
locations, _ := plan["locations"].([]interface{})
for _, loc := range locations {
locStr, _ := loc.(string)
if strings.EqualFold(locStr, pop.Country) {
price, _ := plan["price"].(float64)
if price > 0 && price < planPrice {
planPrice = price
code, _ := plan["code"].(string)
planCode = code
}
}
}
}
}
if planCode == "" {
r.Error = fmt.Sprintf("no plan found for country %s", pop.Country)
return r
}
log(cfg, " [%s] plan: %s ($%.2f/mo)", pop.City, planCode, planPrice)
// Step 2: Place order
log(cfg, " [%s] placing order...", pop.City)
order := map[string]interface{}{
"items": []map[string]interface{}{
{
"action": "new",
"type": "vps",
"plan": planCode,
"location": pop.Country,
"additions": map[string]interface{}{
"os": "linux/ubuntu22#64",
},
},
},
}
orderResult, err := ishAPI("POST", "/billing/order", order)
if err != nil {
r.Error = fmt.Sprintf("place order: %v", err)
return r
}
// Extract invoice ID
invoiceID := ""
if id, ok := orderResult["invoice_id"].(float64); ok {
invoiceID = fmt.Sprintf("%.0f", id)
} else if id, ok := orderResult["invoice_id"].(string); ok {
invoiceID = id
}
if invoiceID == "" {
r.Error = fmt.Sprintf("no invoice ID in response: %v", orderResult)
return r
}
log(cfg, " [%s] invoice: %s", pop.City, invoiceID)
// Step 3: Pay invoice
log(cfg, " [%s] paying invoice...", pop.City)
_, err = ishAPI("POST", "/billing/invoice/"+invoiceID+"/pay", nil)
if err != nil {
r.Error = fmt.Sprintf("pay invoice: %v", err)
return r
}
// Step 4: Wait for provisioning + get IP
log(cfg, " [%s] waiting for provisioning...", pop.City)
var publicIP string
for i := 0; i < 60; i++ {
time.Sleep(10 * time.Second)
services, err := ishAPI("GET", "/services/list?locations="+pop.Country, nil)
if err != nil {
continue
}
if items, ok := services["items"].([]interface{}); ok {
for _, item := range items {
svc, _ := item.(map[string]interface{})
status, _ := svc["status"].(string)
if status == "active" {
if ip, ok := svc["ip"].(string); ok && ip != "" {
publicIP = ip
} else if ips, ok := svc["ips"].([]interface{}); ok && len(ips) > 0 {
if ip, ok := ips[0].(string); ok {
publicIP = ip
}
}
}
}
}
if publicIP != "" {
break
}
log(cfg, " [%s] still provisioning... (%ds)", pop.City, (i+1)*10)
}
if publicIP == "" {
r.Error = "timeout waiting for IP"
return r
}
log(cfg, " [%s] IP: %s", pop.City, publicIP)
// Step 5: Update DB
localDB, _ := sql.Open("sqlite", cfg.DBPath)
localDB.Exec(`UPDATE pops SET ip=?, dns=?, status='live' WHERE pop_id=?`, publicIP, dns, pop.PopID)
localDB.Close()
pop.IP = publicIP
pop.DNS = dns
// Step 6: Bootstrap (Tailscale, harden, deploy) — wait for SSH to be ready
log(cfg, " [%s] waiting for SSH...", pop.City)
time.Sleep(30 * time.Second)
// Run bootstrap inline
bootstrapResult := cmdBootstrap(cfg, pop.City, publicIP, "")
if len(bootstrapResult) > 0 && !bootstrapResult[0].OK {
r.Error = fmt.Sprintf("bootstrap failed: %s", bootstrapResult[0].Error)
return r
}
r.OK = true
r.Message = fmt.Sprintf("%s → %s (ishosting/%s) — live", publicIP, dns, pop.Country)
log(cfg, "\n [%s] DONE: %s\n", pop.City, r.Message)
return r
}
func ensureSecurityGroup(ctx context.Context, client *ec2.Client, region, city string, cfg Config) (string, error) {
// Check if clavitor-pop exists in this region (also check old vault1984-pop name for backward compat)
for _, name := range []string{"clavitor-pop", "vault1984-pop"} {
descOut, err := client.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{
Filters: []ec2types.Filter{{
Name: strPtr("group-name"),
Values: []string{name},
}},
})
if err == nil && len(descOut.SecurityGroups) > 0 {
return *descOut.SecurityGroups[0].GroupId, nil
}
}
// Create it
log(cfg, " [%s] creating security group clavitor-pop in %s...", city, region)
createOut, err := client.CreateSecurityGroup(ctx, &ec2.CreateSecurityGroupInput{
GroupName: strPtr("clavitor-pop"),
Description: strPtr("Clavitor POP - port 1984 only"),
})
if err != nil {
return "", fmt.Errorf("create sg: %w", err)
}
sgID := *createOut.GroupId
// Add port 1984
proto := "tcp"
port := int32(1984)
_, err = client.AuthorizeSecurityGroupIngress(ctx, &ec2.AuthorizeSecurityGroupIngressInput{
GroupId: &sgID,
IpPermissions: []ec2types.IpPermission{{
IpProtocol: &proto,
FromPort: &port,
ToPort: &port,
IpRanges: []ec2types.IpRange{{CidrIp: strPtr("0.0.0.0/0")}},
}},
})
if err != nil {
return sgID, fmt.Errorf("add rule: %w", err)
}
return sgID, nil
}
// --- Node execution: Tailscale SSH with SSM fallback ---
// nodeExec runs a command on a node, trying Tailscale SSH first, falling back to SSM.
func nodeExec(cfg Config, pop POP, command string) (string, error) {
out, err := tsSshExec(pop.Subdomain(), command)
if err == nil {
return out, nil
}
// Tailscale SSH failed — fall back to SSM if we have instance_id
if pop.InstanceID != "" {
log(cfg, " [%s] TS SSH failed, falling back to SSM...", pop.Subdomain())
return ssmRunCommand(pop.InstanceID, pop.RegionName, []string{command})
}
return out, err
}
// nodePushFile copies a local file to a node via SCP (Tailscale).
func nodePushFile(cfg Config, pop POP, localPath, remotePath string) error {
return scpToNode(pop.Subdomain(), localPath, remotePath)
}
func tsSshExec(hostname, command string) (string, error) {
cmd := exec.Command("ssh",
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=5",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "LogLevel=ERROR",
"root@"+hostname,
command,
)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
if err != nil {
return stdout.String() + stderr.String(), err
}
return stdout.String(), nil
}
func scpToNode(hostname, localPath, remotePath string) error {
cmd := exec.Command("scp",
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=5",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "LogLevel=ERROR",
localPath,
"root@"+hostname+":"+remotePath,
)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("%v: %s", err, string(out))
}
return nil
}
// --- SSM (for bootstrap only, before Tailscale is available) ---
func ssmBootstrapTailscale(cfg Config, pop POP) error {
hostname := pop.Subdomain()
region := pop.RegionName
log(cfg, " Installing Tailscale...")
out, err := ssmRunCommand(pop.InstanceID, region, []string{
"command -v tailscale >/dev/null 2>&1 && echo 'already installed' && exit 0",
"curl -fsSL https://tailscale.com/install.sh | sh",
})
if err != nil {
return fmt.Errorf("install tailscale: %w", err)
}
log(cfg, " %s", lastLines(out, 2))
log(cfg, " Joining tailnet as %s...", hostname)
out, err = ssmRunCommand(pop.InstanceID, region, []string{
"systemctl enable --now tailscaled 2>/dev/null || true",
"sleep 2",
fmt.Sprintf("tailscale up --authkey=%s --hostname=%s --ssh --reset", cfg.TSAuthKey, hostname),
"tailscale ip -4",
})
if err != nil {
return fmt.Errorf("join tailnet: %w", err)
}
log(cfg, " TS IP: %s", strings.TrimSpace(out))
log(cfg, " Setting hostname...")
_, _ = ssmRunCommand(pop.InstanceID, region, []string{
fmt.Sprintf("hostnamectl set-hostname %s", hostname),
fmt.Sprintf("grep -q '%s' /etc/hosts || echo '127.0.1.1 %s %s.%s' >> /etc/hosts", hostname, hostname, hostname, cfg.Zone),
})
return nil
}
func ssmRunCommand(instanceID, region string, commands []string) (string, error) {
params := map[string][]string{"commands": commands}
paramsJSON, _ := json.Marshal(params)
tmpFile := fmt.Sprintf("/tmp/ssm-%s-%d.json", instanceID, time.Now().UnixNano())
os.WriteFile(tmpFile, paramsJSON, 0600)
defer os.Remove(tmpFile)
cmd := exec.Command("aws", "ssm", "send-command",
"--instance-ids", instanceID,
"--document-name", "AWS-RunShellScript",
"--region", region,
"--parameters", "file://"+tmpFile,
"--query", "Command.CommandId",
"--output", "text")
out, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("send-command: %w\n%s", err, string(out))
}
cmdID := strings.TrimSpace(string(out))
if cmdID == "" {
return "", fmt.Errorf("empty command ID returned")
}
for i := 0; i < 36; i++ {
time.Sleep(5 * time.Second)
poll := exec.Command("aws", "ssm", "get-command-invocation",
"--command-id", cmdID,
"--instance-id", instanceID,
"--region", region,
"--query", "[Status,StandardOutputContent,StandardErrorContent]",
"--output", "text")
result, err := poll.CombinedOutput()
if err != nil {
continue
}
parts := strings.SplitN(strings.TrimSpace(string(result)), "\t", 3)
if len(parts) < 1 {
continue
}
switch parts[0] {
case "Success":
stdout := ""
if len(parts) >= 2 {
stdout = parts[1]
}
return stdout, nil
case "Failed":
stderr := ""
if len(parts) >= 3 {
stderr = parts[2]
}
return "", fmt.Errorf("command failed: %s", stderr)
}
}
return "", fmt.Errorf("timeout waiting for SSM command %s", cmdID)
}
// --- DNS sync ---
func syncDNS(cfg Config, pops []POP) []NodeResult {
records, err := cfListRecords(cfg.CFToken, cfg.CFZoneID)
if err != nil {
fatal("listing DNS records: %v", err)
}
existing := map[string]cfDNSRecord{}
protected := map[string]bool{cfg.Zone: true, "www." + cfg.Zone: true}
for _, r := range records {
if r.Type == "A" && !protected[r.Name] {
existing[r.Name] = r
}
}
desired := map[string]POP{}
for _, p := range pops {
if p.Zone() == cfg.Zone {
desired[p.DNS] = p
}
}
var results []NodeResult
for fqdn, pop := range desired {
r := NodeResult{Node: pop.Subdomain(), Action: "dns"}
if rec, ok := existing[fqdn]; ok {
if rec.Content == pop.IP {
r.OK = true
r.Message = fmt.Sprintf("%s -> %s", fqdn, pop.IP)
log(cfg, " OK %s -> %s", fqdn, pop.IP)
} else {
if !cfg.DryRun {
if err := cfUpdateRecord(cfg.CFToken, cfg.CFZoneID, rec.ID, fqdn, pop.IP); err != nil {
r.Error = err.Error()
} else {
r.OK = true
}
}
r.Message = fmt.Sprintf("updated %s -> %s (was %s)", fqdn, pop.IP, rec.Content)
log(cfg, " UPD %s: %s -> %s", fqdn, rec.Content, pop.IP)
}
delete(existing, fqdn)
} else {
if !cfg.DryRun {
if err := cfCreateRecord(cfg.CFToken, cfg.CFZoneID, fqdn, pop.IP); err != nil {
r.Error = err.Error()
} else {
r.OK = true
}
}
r.Message = fmt.Sprintf("created %s -> %s", fqdn, pop.IP)
log(cfg, " ADD %s -> %s", fqdn, pop.IP)
}
results = append(results, r)
}
for fqdn, rec := range existing {
sub := strings.TrimSuffix(fqdn, "."+cfg.Zone)
if sub == fqdn || isKnownNonPOP(sub) {
continue
}
r := NodeResult{Node: sub, Action: "dns-delete"}
if !cfg.DryRun {
if err := cfDeleteRecord(cfg.CFToken, cfg.CFZoneID, rec.ID); err != nil {
r.Error = err.Error()
} else {
r.OK = true
}
}
r.Message = fmt.Sprintf("deleted %s (%s)", fqdn, rec.Content)
log(cfg, " DEL %s (%s)", fqdn, rec.Content)
results = append(results, r)
}
return results
}
func isKnownNonPOP(sub string) bool {
skip := map[string]bool{
"dev": true, "soc": true, "api": true, "mail": true,
"mx": true, "ns1": true, "ns2": true, "status": true,
}
return skip[sub]
}
// --- Tailscale sync ---
func syncTailscale(cfg Config, pops []POP) []NodeResult {
devices, err := tsListDevices(cfg.TSKey)
if err != nil {
fatal("listing Tailscale devices: %v", err)
}
desired := map[string]POP{}
for _, p := range pops {
if sub := p.Subdomain(); sub != "" {
desired[sub] = p
}
}
devicesByHostname := map[string][]tsDevice{}
for _, d := range devices {
devicesByHostname[d.Hostname] = append(devicesByHostname[d.Hostname], d)
}
var results []NodeResult
for sub, pop := range desired {
r := NodeResult{Node: sub, Action: "tailscale"}
if devs, ok := devicesByHostname[sub]; ok && len(devs) > 0 {
tsIP := ""
if len(devs[0].Addresses) > 0 {
tsIP = devs[0].Addresses[0]
}
r.OK = true
r.Message = fmt.Sprintf("TS:%s public:%s", tsIP, pop.IP)
log(cfg, " OK %s — %s (TS: %s, public: %s)", sub, pop.City, tsIP, pop.IP)
delete(devicesByHostname, sub)
} else {
if pop.InstanceID != "" && cfg.TSAuthKey != "" && !cfg.DryRun {
log(cfg, " JOIN %s — %s via SSM (%s in %s)...", sub, pop.City, pop.InstanceID, pop.RegionName)
if err := ssmBootstrapTailscale(cfg, pop); err != nil {
r.Error = err.Error()
log(cfg, " ERR: %v", err)
} else {
r.OK = true
r.Message = "joined via SSM"
log(cfg, " Joined tailnet as %s", sub)
}
} else if pop.InstanceID != "" && cfg.TSAuthKey != "" {
r.Message = fmt.Sprintf("would SSM bootstrap %s in %s", pop.InstanceID, pop.RegionName)
log(cfg, " JOIN %s — %s (would bootstrap)", sub, pop.City)
} else if pop.InstanceID != "" {
r.Error = "no -ts-authkey provided"
log(cfg, " MISSING %s — needs -ts-authkey", sub)
} else {
r.Error = "no instance_id, cannot auto-join"
log(cfg, " MISSING %s — no instance_id", sub)
}
}
results = append(results, r)
}
for hostname, devs := range devicesByHostname {
for _, d := range devs {
if !isInfraDevice(d) {
continue
}
r := NodeResult{Node: hostname, Action: "tailscale-delete"}
if !cfg.DryRun {
if err := tsDeleteDevice(cfg.TSKey, d.ID); err != nil {
r.Error = err.Error()
} else {
r.OK = true
r.Message = "removed stale device"
}
} else {
r.Message = "would remove stale device"
}
log(cfg, " STALE %s — removed", hostname)
results = append(results, r)
}
}
return results
}
func isInfraDevice(d tsDevice) bool {
return strings.Contains(d.Name, "tagged-")
}
// --- Parallel execution ---
func parallelExec(pops []POP, workers int, fn func(POP) NodeResult) []NodeResult {
ch := make(chan POP, len(pops))
for _, p := range pops {
ch <- p
}
close(ch)
var mu sync.Mutex
var results []NodeResult
var wg sync.WaitGroup
for i := 0; i < workers && i < len(pops); i++ {
wg.Add(1)
go func() {
defer wg.Done()
for p := range ch {
r := fn(p)
mu.Lock()
results = append(results, r)
mu.Unlock()
}
}()
}
wg.Wait()
return results
}
// --- DB ---
func loadLivePOPs(cfg Config) []POP {
pops, err := readPOPs(cfg.DBPath)
if err != nil {
fatal("reading DB: %v", err)
}
var live []POP
for _, p := range pops {
if p.Status == "live" && p.IP != "" && p.DNS != "" {
live = append(live, p)
}
}
return live
}
func readPOPs(dbPath string) ([]POP, error) {
db, err := sql.Open("sqlite", dbPath)
if err != nil {
return nil, err
}
defer db.Close()
rows, err := db.Query(`SELECT pop_id, city, country, region_name, ip, dns, status, provider, instance_id, arch FROM pops ORDER BY pop_id`)
if err != nil {
return nil, err
}
defer rows.Close()
var pops []POP
for rows.Next() {
var p POP
if err := rows.Scan(&p.PopID, &p.City, &p.Country, &p.RegionName, &p.IP, &p.DNS, &p.Status, &p.Provider, &p.InstanceID, &p.Arch); err != nil {
return nil, err
}
pops = append(pops, p)
}
return pops, rows.Err()
}
func filterNodes(cfg Config, pops []POP) []POP {
if cfg.Nodes == "" {
return pops
}
want := map[string]bool{}
for _, n := range strings.Split(cfg.Nodes, ",") {
want[strings.TrimSpace(n)] = true
}
var out []POP
for _, p := range pops {
if want[p.Subdomain()] {
out = append(out, p)
}
}
return out
}
// --- Output ---
func log(cfg Config, format string, args ...interface{}) {
if !cfg.JSONOut {
fmt.Printf(format+"\n", args...)
}
}
func outputResults(cfg Config, results []NodeResult) {
if cfg.JSONOut {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(results)
}
}
func oneLineStatus(raw string) string {
lines := strings.Split(strings.TrimSpace(raw), "\n")
parts := make([]string, 0, len(lines))
for _, l := range lines {
l = strings.TrimSpace(l)
if l != "" {
parts = append(parts, l)
}
}
return strings.Join(parts, " | ")
}
func lastLines(s string, n int) string {
lines := strings.Split(strings.TrimSpace(s), "\n")
if len(lines) <= n {
return strings.TrimSpace(s)
}
return strings.Join(lines[len(lines)-n:], "\n")
}
func requireKeys(cfg Config, keys ...string) {
for _, k := range keys {
switch k {
case "cf":
if cfg.CFToken == "" {
fatal("Cloudflare token required: set CF_API_TOKEN or -cf-token")
}
case "ts":
if cfg.TSKey == "" {
fatal("Tailscale API key required: set TS_API_KEY or -ts-key")
}
}
}
}
func fatal(format string, args ...interface{}) {
fmt.Fprintf(os.Stderr, "FATAL: "+format+"\n", args...)
os.Exit(2)
}
// --- Cloudflare API ---
type cfDNSRecord struct {
ID string `json:"id"`
Type string `json:"type"`
Name string `json:"name"`
Content string `json:"content"`
Proxied bool `json:"proxied"`
TTL int `json:"ttl"`
}
type cfListResponse struct {
Success bool `json:"success"`
Result []cfDNSRecord `json:"result"`
}
type cfMutateResponse struct {
Success bool `json:"success"`
Result cfDNSRecord `json:"result"`
Errors []struct {
Message string `json:"message"`
} `json:"errors"`
}
func cfResolveZoneID(token, zone string) (string, error) {
req, _ := http.NewRequest("GET", "https://api.cloudflare.com/client/v4/zones?name="+zone, nil)
req.Header.Set("Authorization", "Bearer "+token)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
var result struct {
Result []struct {
ID string `json:"id"`
} `json:"result"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return "", err
}
if len(result.Result) == 0 {
return "", fmt.Errorf("zone %q not found", zone)
}
return result.Result[0].ID, nil
}
func cfListRecords(token, zoneID string) ([]cfDNSRecord, error) {
var all []cfDNSRecord
page := 1
for {
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records?per_page=100&page=%d", zoneID, page)
req, _ := http.NewRequest("GET", url, nil)
req.Header.Set("Authorization", "Bearer "+token)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
var result cfListResponse
err = json.NewDecoder(resp.Body).Decode(&result)
resp.Body.Close()
if err != nil {
return nil, err
}
all = append(all, result.Result...)
if len(result.Result) < 100 {
break
}
page++
}
return all, nil
}
func cfCreateRecord(token, zoneID, fqdn, ip string) error {
body, _ := json.Marshal(map[string]interface{}{
"type": "A", "name": fqdn, "content": ip, "ttl": 1, "proxied": false,
})
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records", zoneID)
req, _ := http.NewRequest("POST", url, bytes.NewReader(body))
req.Header.Set("Authorization", "Bearer "+token)
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
var result cfMutateResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return err
}
if !result.Success {
return fmt.Errorf("API error: %v", result.Errors)
}
return nil
}
func cfUpdateRecord(token, zoneID, recordID, fqdn, ip string) error {
body, _ := json.Marshal(map[string]interface{}{
"type": "A", "name": fqdn, "content": ip, "ttl": 1, "proxied": false,
})
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records/%s", zoneID, recordID)
req, _ := http.NewRequest("PUT", url, bytes.NewReader(body))
req.Header.Set("Authorization", "Bearer "+token)
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
var result cfMutateResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return err
}
if !result.Success {
return fmt.Errorf("API error: %v", result.Errors)
}
return nil
}
func cfDeleteRecord(token, zoneID, recordID string) error {
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records/%s", zoneID, recordID)
req, _ := http.NewRequest("DELETE", url, nil)
req.Header.Set("Authorization", "Bearer "+token)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
var result struct {
Success bool `json:"success"`
}
json.Unmarshal(body, &result)
if !result.Success {
return fmt.Errorf("delete failed: %s", string(body))
}
return nil
}
// --- Tailscale API ---
type tsDevice struct {
ID string `json:"id"`
Hostname string `json:"hostname"`
Name string `json:"name"`
Addresses []string `json:"addresses"`
}
type tsListResponse struct {
Devices []tsDevice `json:"devices"`
}
func tsListDevices(apiKey string) ([]tsDevice, error) {
req, _ := http.NewRequest("GET", "https://api.tailscale.com/api/v2/tailnet/-/devices", nil)
req.SetBasicAuth(apiKey, "")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
}
var result tsListResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Devices, nil
}
func tsDeleteDevice(apiKey, deviceID string) error {
req, _ := http.NewRequest("DELETE", "https://api.tailscale.com/api/v2/device/"+deviceID, nil)
req.SetBasicAuth(apiKey, "")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
}
return nil
}