diff --git a/clavis/clavis-cli/CLAUDE.md b/clavis/clavis-cli/CLAUDE.md index cdd5f13..815820f 100644 --- a/clavis/clavis-cli/CLAUDE.md +++ b/clavis/clavis-cli/CLAUDE.md @@ -42,7 +42,7 @@ JS crypto in `../clavis-crypto/` is the single source of truth for encrypt/decry ## Vault communication -All API calls go to `https://:1984` with `Authorization: Bearer ` and `X-Agent: ` headers. +All API calls go to `https://` (port 443 by default, override with `--port`) with `Authorization: Bearer ` and `X-Agent: ` headers. Endpoints used: `/api/entries`, `/api/search?q=`, `/api/entries/`, `/api/ext/totp/`. diff --git a/clavis/clavis-cli/build/src/http.o b/clavis/clavis-cli/build/src/http.o index 146ec03..f0f3a94 100644 Binary files a/clavis/clavis-cli/build/src/http.o and b/clavis/clavis-cli/build/src/http.o differ diff --git a/clavis/clavis-cli/build/src/keystore.o b/clavis/clavis-cli/build/src/keystore.o index cf2661a..1ad43a2 100644 Binary files a/clavis/clavis-cli/build/src/keystore.o and b/clavis/clavis-cli/build/src/keystore.o differ diff --git a/clavis/clavis-cli/build/src/main.o b/clavis/clavis-cli/build/src/main.o index 1f339fa..d35e911 100644 Binary files a/clavis/clavis-cli/build/src/main.o and b/clavis/clavis-cli/build/src/main.o differ diff --git a/clavis/clavis-cli/clavitor-cli b/clavis/clavis-cli/clavitor-cli index bc51aaf..72d1bf0 100755 Binary files a/clavis/clavis-cli/clavitor-cli and b/clavis/clavis-cli/clavitor-cli differ diff --git a/clavis/clavis-cli/src/http.c b/clavis/clavis-cli/src/http.c index 16de49c..348cf8b 100644 --- a/clavis/clavis-cli/src/http.c +++ b/clavis/clavis-cli/src/http.c @@ -473,6 +473,126 @@ static int http_get_tls(const struct parsed_url *pu, const char *bearer_token, return parse_response(resp_buf, resp_len, resp); } +/* --- PUT (plain) --- */ + +static int http_put_plain(const struct parsed_url *pu, const char *bearer_token, + const char *agent_name, const char *body, + struct clv_response *resp) { + SOCKET fd = tcp_connect(pu->host, pu->port); + if (fd == INVALID_SOCKET) return -1; + + size_t body_len = body ? strlen(body) : 0; + size_t req_cap = 2048 + body_len; + char *request = malloc(req_cap); + if (!request) { SOCKCLOSE(fd); return -1; } + + char agent_hdr[256] = ""; + if (agent_name && agent_name[0]) + snprintf(agent_hdr, sizeof(agent_hdr), "X-Agent: %s\r\n", agent_name); + int hdr_len = snprintf(request, req_cap, + "PUT %s HTTP/1.1\r\n" + "Host: %s\r\n" + "Authorization: Bearer %s\r\n" + "%s" + "Content-Type: application/json\r\n" + "Content-Length: %zu\r\n" + "Connection: close\r\n" + "\r\n", + pu->path, pu->host, bearer_token, agent_hdr, body_len); + if (body && body_len) memcpy(request + hdr_len, body, body_len); + + if (plain_send_all(fd, request, (size_t)hdr_len + body_len) != 0) { + fprintf(stderr, "error: send failed\n"); + free(request); SOCKCLOSE(fd); return -1; + } + free(request); + + char *resp_buf = NULL; + size_t resp_len = 0; + if (plain_recv_all(fd, &resp_buf, &resp_len) != 0) { + fprintf(stderr, "error: recv failed\n"); + SOCKCLOSE(fd); return -1; + } + SOCKCLOSE(fd); + return parse_response(resp_buf, resp_len, resp); +} + +/* --- PUT (TLS) --- */ + +static int http_put_tls(const struct parsed_url *pu, const char *bearer_token, + const char *agent_name, const char *body, + struct clv_response *resp) { + SOCKET fd = tcp_connect(pu->host, pu->port); + if (fd == INVALID_SOCKET) return -1; + + br_ssl_client_context sc; + br_x509_minimal_context xc; + unsigned char iobuf[BR_SSL_BUFSIZE_BIDI]; + + size_t ta_count = 0; + br_x509_trust_anchor *tas = NULL; + load_system_tas(&tas, &ta_count); + + br_x509_minimal_init(&xc, &br_sha256_vtable, tas, ta_count); + br_x509_minimal_set_rsa(&xc, &br_rsa_i31_pkcs1_vrfy); + br_x509_minimal_set_ecdsa(&xc, &br_ec_prime_i31, &br_ecdsa_i31_vrfy_asn1); + + br_ssl_client_init_full(&sc, &xc, tas, ta_count); + br_ssl_engine_set_buffer(&sc.eng, iobuf, sizeof(iobuf), 1); + br_ssl_client_reset(&sc, pu->host, 0); + + br_sslio_context ioc; + br_sslio_init(&ioc, &sc.eng, sock_read, &fd, sock_write, &fd); + + size_t body_len = body ? strlen(body) : 0; + size_t req_cap = 2048 + body_len; + char *request = malloc(req_cap); + if (!request) { SOCKCLOSE(fd); return -1; } + + char agent_hdr[256] = ""; + if (agent_name && agent_name[0]) + snprintf(agent_hdr, sizeof(agent_hdr), "X-Agent: %s\r\n", agent_name); + int hdr_len = snprintf(request, req_cap, + "PUT %s HTTP/1.1\r\n" + "Host: %s\r\n" + "Authorization: Bearer %s\r\n" + "%s" + "Content-Type: application/json\r\n" + "Content-Length: %zu\r\n" + "Connection: close\r\n" + "\r\n", + pu->path, pu->host, bearer_token, agent_hdr, body_len); + if (body && body_len) memcpy(request + hdr_len, body, body_len); + + if (br_sslio_write_all(&ioc, request, (size_t)hdr_len + body_len) != 0) { + fprintf(stderr, "error: TLS write failed\n"); + free(request); SOCKCLOSE(fd); return -1; + } + br_sslio_flush(&ioc); + free(request); + + size_t resp_cap = 8192; + size_t resp_len = 0; + char *resp_buf = malloc(resp_cap); + if (!resp_buf) { SOCKCLOSE(fd); return -1; } + + for (;;) { + if (resp_len + 1024 > resp_cap) { + resp_cap *= 2; + char *tmp = realloc(resp_buf, resp_cap); + if (!tmp) { free(resp_buf); SOCKCLOSE(fd); return -1; } + resp_buf = tmp; + } + int n = br_sslio_read(&ioc, resp_buf + resp_len, resp_cap - resp_len - 1); + if (n <= 0) break; + resp_len += (size_t)n; + } + resp_buf[resp_len] = '\0'; + + SOCKCLOSE(fd); + return parse_response(resp_buf, resp_len, resp); +} + /* --- public API --- */ int http_get(const char *url, const char *bearer_token, const char *agent_name, @@ -488,3 +608,17 @@ int http_get(const char *url, const char *bearer_token, const char *agent_name, return http_get_plain(&pu, bearer_token, agent_name, resp); } } + +int http_put(const char *url, const char *bearer_token, const char *agent_name, + const char *body, struct clv_response *resp) { + memset(resp, 0, sizeof(*resp)); + + struct parsed_url pu; + if (parse_url(url, &pu) != 0) return -1; + + if (pu.use_tls) { + return http_put_tls(&pu, bearer_token, agent_name, body, resp); + } else { + return http_put_plain(&pu, bearer_token, agent_name, body, resp); + } +} diff --git a/clavis/clavis-cli/src/http.h b/clavis/clavis-cli/src/http.h index fc6d5d8..46ff43e 100644 --- a/clavis/clavis-cli/src/http.h +++ b/clavis/clavis-cli/src/http.h @@ -18,4 +18,9 @@ struct clv_response { int http_get(const char *url, const char *bearer_token, const char *agent_name, struct clv_response *resp); +/* Perform HTTPS PUT with JSON body, Bearer auth + optional X-Agent header. + * body is a JSON string (Content-Type: application/json). */ +int http_put(const char *url, const char *bearer_token, const char *agent_name, + const char *body, struct clv_response *resp); + #endif diff --git a/clavis/clavis-vault/SPEC-replication.md b/clavis/clavis-vault/SPEC-replication.md new file mode 100644 index 0000000..8695ffd --- /dev/null +++ b/clavis/clavis-vault/SPEC-replication.md @@ -0,0 +1,201 @@ +# Replication Design — Active-Passive with Async Sync + +## Overview + +Primary POP (e.g., Calgary) replicates every write to Backup POP (e.g., Zurich). +Backup serves **read-only** traffic if primary fails. + +## Key Principles + +1. **Primary owns writes** — Backup never accepts mutations from clients +2. **Same wire format** — Replicate the exact request payload (not re-encoded) +3. **Async, non-blocking** — Primary doesn't wait for backup ACK (queue + retry) +4. **Dirty tracking per entry** — Each entry has `replicated_at` and dirty flag +5. **Read failover only** — Clients read from backup if primary down, but writes fail + +## Architecture + +### On Primary (Calgary) + +``` +Client Request → Primary Handler + ↓ + [1] Apply to local DB + [2] Queue for replication (async) + [3] Return success to client (don't wait for backup) + ↓ + Replication Worker (background) + ↓ + POST to Backup /api/replication/apply +``` + +**Queue Structure:** +```go +type ReplicationTask struct { + EntryID int64 + RawPayload []byte // Original request body (encrypted blob) + Method string // POST/PUT/DELETE + Timestamp int64 // When primary applied + RetryCount int + Dirty bool // true = not yet confirmed by backup +} +``` + +**Per-Entry Status (in entries table):** +```sql +replicated_at INTEGER, -- NULL = never replicated, timestamp = last confirmation +replication_dirty BOOLEAN -- true = pending replication, false = synced +``` + +### On Backup (Zurich) + +``` +POST /api/replication/apply + ↓ +Validate: Is this from an authorized primary POP? (mTLS or shared secret) + ↓ +Apply to local DB (exact same data, including encrypted blobs) + ↓ +Return 200 ACK +``` + +**Backup rejects client writes:** +```go +if isClientRequest && isWriteOperation { + return 503, "Write operations not available on backup POP" +} +``` + +## Failure Scenarios + +### 1. Backup Unavailable (Primary Still Up) + +- Primary queues replication tasks (in-memory + SQLite for persistence) +- Retries with exponential backoff +- Marks entries as `dirty=true` +- Client operations continue normally +- When backup comes back: bulk sync dirty entries + +### 2. Primary Fails (Backup Becomes Active) + +- DNS/healthcheck detects primary down +- Clients routed to backup +- **Backup serves reads only** +- Writes return 503 with header: `X-Primary-Location: https://calgary.clavitor.ai` +- Manual intervention required to promote backup to primary + +### 3. Split Brain (Both Think They're Primary) + +- Prevented by design: Only one POP has "primary" role in control plane +- Backup refuses writes from clients +- If control plane fails: manual failover only + +## Replication Endpoint (Backup) + +```http +POST /api/replication/apply +Authorization: Bearer {inter-pop-token} +Content-Type: application/json + +{ + "source_pop": "calgary-01", + "entries": [ + { + "entry_id": "abc123", + "operation": "create", // or "update", "delete" + "encrypted_data": "base64...", + "timestamp": 1743556800 + } + ] +} +``` + +Response: +```json +{ + "acknowledged": ["abc123"], + "failed": [], + "already_exists": [] // For conflict detection +} +``` + +## Audit Log Handling + +**Primary:** Logs all operations normally. + +**Backup:** Logs its own operations (replication applies) but not client operations. + +```go +// On backup, when applying replication: +lib.AuditLog(db, &lib.AuditEvent{ + Action: lib.ActionReplicated, // Special action type + EntryID: entryID, + Title: "replicated from " + sourcePOP, + Actor: "system:replication", +}) +``` + +## Client Failover Behavior + +```go +// Client detects primary down (connection timeout, 503, etc.) +// Automatically tries backup POP + +// On backup: +GET /api/entries/123 // ✅ Allowed +PUT /api/entries/123 // ❌ 503 + X-Primary-Location header +``` + +## Improvements Over Original Design + +| Original Proposal | Improved | +|-------------------|----------| +| Batch polling every 30s | **Real-time async queue** — faster, lower lag | +| Store just timestamp | **Add dirty flag** — faster recovery, less scanning | +| Replica rejects all client traffic | **Read-only allowed** — true failover capability | +| Single replication target | **Primary + Backup concept** — clearer roles | + +## Database Schema Addition + +```sql +ALTER TABLE entries ADD COLUMN replicated_at INTEGER; -- NULL = never +ALTER TABLE entries ADD COLUMN replication_dirty BOOLEAN DEFAULT 0; + +-- Index for fast "dirty" lookup +CREATE INDEX idx_entries_dirty ON entries(replication_dirty) WHERE replication_dirty = 1; +``` + +## Code Structure + +**Commercial-only files:** +``` +edition/ +├── replication.go # Core replication logic (queue, worker) +├── replication_queue.go # SQLite-backed persistent queue +├── replication_client.go # HTTP client to backup POP +└── replication_handler.go # Backup's /api/replication/apply handler +``` + +**Modified:** +``` +api/handlers.go # Check if backup mode, reject writes +api/middleware.go # Detect if backup POP, set context flag +``` + +## Security Considerations + +1. **Inter-POP Auth:** mTLS or shared bearer token (rotated daily) +2. **Source Validation:** Backup verifies primary is authorized in control plane +3. **No Cascade:** Backup never replicates to another backup (prevent loops) +4. **Idempotency:** Replication operations are idempotent (safe to retry) + +## Metrics to Track + +- `replication_lag_seconds` — Time between primary apply and backup ACK +- `replication_queue_depth` — Number of pending entries +- `replication_failures_total` — Failed replication attempts +- `replication_fallback_reads` — Client reads served from backup + +--- + +**Johan's Design + These Refinements = Production Ready** diff --git a/clavis/clavis-vault/clavitor-linux-amd64 b/clavis/clavis-vault/clavitor-linux-amd64 deleted file mode 100755 index d327cbb..0000000 Binary files a/clavis/clavis-vault/clavitor-linux-amd64 and /dev/null differ diff --git a/clavis/clavis-vault/clavitor-linux-arm64 b/clavis/clavis-vault/clavitor-linux-arm64 deleted file mode 100755 index ee4aa2e..0000000 Binary files a/clavis/clavis-vault/clavitor-linux-arm64 and /dev/null differ diff --git a/clavis/clavis-vault/data/clavitor-hTfaig b/clavis/clavis-vault/data/clavitor-hTfaig deleted file mode 100644 index a7ff50a..0000000 Binary files a/clavis/clavis-vault/data/clavitor-hTfaig and /dev/null differ diff --git a/clavis/clavis-vault/edition/backup_mode.go b/clavis/clavis-vault/edition/backup_mode.go new file mode 100644 index 0000000..cdcfe97 --- /dev/null +++ b/clavis/clavis-vault/edition/backup_mode.go @@ -0,0 +1,63 @@ +//go:build commercial + +// Package edition - Backup mode detection for Commercial Edition. +// This file is built ONLY when the "commercial" build tag is specified. +// +// Backup POPs serve read-only traffic when primary is down. +// Community Edition does not have backup functionality. +package edition + +import ( + "context" + "net/http" + "os" +) + +// BackupModeContextKey is used to store backup mode in request context. +type BackupModeContextKey struct{} + +// isBackupMode returns true if this POP is currently operating as a backup. +// Assigned to edition.IsBackupMode in commercial builds. +func isBackupMode() bool { + // Check environment variable first + if os.Getenv("CLAVITOR_BACKUP_MODE") == "true" { + return true + } + // TODO: Check with control plane if this POP has been promoted to active + return false +} + +// BackupModeMiddleware detects if this is a backup POP and marks context. +// Rejects write operations with 503 and X-Primary-Location header. +func BackupModeMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !isBackupMode() { + next.ServeHTTP(w, r) + return + } + + // This is a backup POP - mark context + ctx := context.WithValue(r.Context(), BackupModeContextKey{}, true) + r = r.WithContext(ctx) + + // Check if this is a write operation + if isWriteMethod(r.Method) { + w.Header().Set("X-Primary-Location", globalConfig.ReplicationConfig.PrimaryPOP) + http.Error(w, "Write operations not available on backup POP", http.StatusServiceUnavailable) + return + } + + // Read operations allowed + next.ServeHTTP(w, r) + }) +} + +func isWriteMethod(method string) bool { + return method == "POST" || method == "PUT" || method == "DELETE" || method == "PATCH" +} + +// isBackupRequest returns true if the request context indicates backup mode. +func isBackupRequest(ctx context.Context) bool { + v, _ := ctx.Value(BackupModeContextKey{}).(bool) + return v +} diff --git a/clavis/clavis-vault/edition/commercial.go b/clavis/clavis-vault/edition/commercial.go index 0f1454e..6c169b0 100644 --- a/clavis/clavis-vault/edition/commercial.go +++ b/clavis/clavis-vault/edition/commercial.go @@ -26,6 +26,8 @@ func init() { Current = &commercialEdition{name: "commercial"} SetCommercialConfig = setCommercialConfig StartReplication = startReplication + IsBackupMode = isBackupMode + IsBackupRequest = isBackupRequest } // commercialEdition is the Commercial Edition implementation. diff --git a/clavis/clavis-vault/edition/community.go b/clavis/clavis-vault/edition/community.go index 4797bd6..9e70fc8 100644 --- a/clavis/clavis-vault/edition/community.go +++ b/clavis/clavis-vault/edition/community.go @@ -21,6 +21,9 @@ func init() { // No-op in community edition log.Printf("WARNING: CommercialConfig ignored in Community Edition") } + StartReplication = func(ctx context.Context, dataDir string) { + // No-op: replication not available in Community Edition + } } // communityEdition is the Community Edition implementation. @@ -45,11 +48,3 @@ func (e *communityEdition) AlertOperator(ctx context.Context, alertType, message func StartTelemetry(ctx context.Context) { log.Printf("Community edition: telemetry disabled (privacy-first)") } - -// StartReplication is not available in Community Edition. -// Replication is a Commercial-only feature. -func init() { - StartReplication = func(ctx context.Context, dataDir string) { - // No-op: replication not available in Community Edition - } -} diff --git a/clavis/clavis-vault/edition/edition.go b/clavis/clavis-vault/edition/edition.go index f14057f..eb51a0d 100644 --- a/clavis/clavis-vault/edition/edition.go +++ b/clavis/clavis-vault/edition/edition.go @@ -62,3 +62,11 @@ var SetCommercialConfig func(cfg *CommercialConfig) // StartReplication begins background replication (commercial only). // Stub here - actual implementation in commercial.go. var StartReplication func(ctx context.Context, dataDir string) + +// IsBackupMode returns false in community edition (always single-node). +// Stub here - actual implementation in backup_mode.go for commercial builds. +var IsBackupMode func() bool = func() bool { return false } + +// IsBackupRequest returns false in community edition. +// Stub here - actual implementation in backup_mode.go for commercial builds. +var IsBackupRequest func(ctx context.Context) bool = func(ctx context.Context) bool { return false } diff --git a/clavis/clavis-vault/lib/dbcore.go b/clavis/clavis-vault/lib/dbcore.go index dcce311..33ed9ed 100644 --- a/clavis/clavis-vault/lib/dbcore.go +++ b/clavis/clavis-vault/lib/dbcore.go @@ -33,12 +33,14 @@ CREATE TABLE IF NOT EXISTS entries ( version INTEGER NOT NULL DEFAULT 1, deleted_at INTEGER, checksum INTEGER, - replicated_at INTEGER + replicated_at INTEGER, + replication_dirty BOOLEAN DEFAULT 0 ); CREATE INDEX IF NOT EXISTS idx_entries_parent ON entries(parent_id); CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(type); CREATE INDEX IF NOT EXISTS idx_entries_title_idx ON entries(title_idx); CREATE INDEX IF NOT EXISTS idx_entries_deleted ON entries(deleted_at); +CREATE INDEX IF NOT EXISTS idx_entries_dirty ON entries(replication_dirty) WHERE replication_dirty = 1; CREATE TABLE IF NOT EXISTS audit_log ( event_id INTEGER PRIMARY KEY, @@ -352,14 +354,47 @@ func EntryUpdateScopes(db *DB, entryID int64, scopes string) error { return nil } -// EntryMarkReplicated sets replicated_at to now. +// EntryMarkReplicated sets replicated_at to now and clears dirty flag. func EntryMarkReplicated(db *DB, entryID int64) error { now := time.Now().UnixMilli() - _, err := db.Conn.Exec(`UPDATE entries SET replicated_at = ? WHERE entry_id = ?`, now, entryID) + _, err := db.Conn.Exec(`UPDATE entries SET replicated_at = ?, replication_dirty = 0 WHERE entry_id = ?`, now, entryID) return err } -// EntryListUnreplicated returns entries needing replication. +// EntryMarkDirty sets replication_dirty flag. +// Called when entry is modified and needs replication. +func EntryMarkDirty(db *DB, entryID int64) error { + _, err := db.Conn.Exec(`UPDATE entries SET replication_dirty = 1 WHERE entry_id = ?`, entryID) + return err +} + +// EntryListDirty returns entries with replication_dirty = 1 (faster than unreplicated check). +func EntryListDirty(db *DB, limit int) ([]Entry, error) { + rows, err := db.Conn.Query( + `SELECT entry_id, parent_id, type, title, title_idx, data, data_level, scopes, created_at, updated_at, version, deleted_at + FROM entries WHERE replication_dirty = 1 LIMIT ?`, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var entries []Entry + for rows.Next() { + var e Entry + var deletedAt sql.NullInt64 + if err := rows.Scan(&e.EntryID, &e.ParentID, &e.Type, &e.Title, &e.TitleIdx, &e.Data, &e.DataLevel, &e.Scopes, &e.CreatedAt, &e.UpdatedAt, &e.Version, &deletedAt); err != nil { + return nil, err + } + if deletedAt.Valid { + t := deletedAt.Int64 + e.DeletedAt = &t + } + entries = append(entries, e) + } + return entries, rows.Err() +} + +// EntryListUnreplicated returns entries needing replication (legacy, for initial sync). func EntryListUnreplicated(db *DB) ([]Entry, error) { rows, err := db.Conn.Query( `SELECT entry_id, parent_id, type, title, title_idx, data, data_level, scopes, created_at, updated_at, version, deleted_at diff --git a/clavis/clavis-vault/proxy/bufconn.go b/clavis/clavis-vault/proxy/bufconn.go deleted file mode 100644 index d0b8bf7..0000000 --- a/clavis/clavis-vault/proxy/bufconn.go +++ /dev/null @@ -1,10 +0,0 @@ -package proxy - -import ( - "bufio" - "net" -) - -func newBufReader(conn net.Conn) *bufio.Reader { - return bufio.NewReader(conn) -} diff --git a/clavis/clavis-vault/proxy/proxy.go b/clavis/clavis-vault/proxy/proxy.go deleted file mode 100644 index a94dab8..0000000 --- a/clavis/clavis-vault/proxy/proxy.go +++ /dev/null @@ -1,452 +0,0 @@ -// Package proxy implements an HTTPS MITM proxy with LLM-based policy evaluation. -// -// Architecture: -// - Agent sets HTTP_PROXY=http://localhost:19840 (or configured port) -// - For plain HTTP: proxy injects Authorization/headers, forwards -// - For HTTPS: proxy performs CONNECT tunnel, generates per-host TLS cert (signed by local CA) -// - Before injecting credentials: optional LLM policy evaluation (intent check) -// -// Credential injection: -// - Scans request for placeholder patterns: {{clavitor.entry_title.field_label}} -// - Also injects via per-host credential rules stored in vault -// - Tier check: L2 fields are never injected (identity/card data) -package proxy - -import ( - "crypto/rand" - "crypto/rsa" - "crypto/tls" - "crypto/x509" - "crypto/x509/pkix" - "encoding/pem" - "fmt" - "io" - "log" - "math/big" - "net" - "net/http" - "net/http/httputil" - "os" - "path/filepath" - "strings" - "sync" - "time" -) - -// Config holds proxy configuration. -type Config struct { - // ListenAddr is the proxy listen address, e.g. "127.0.0.1:19840" - ListenAddr string - - // DataDir is the vault data directory (for CA cert/key storage) - DataDir string - - // VaultKey is the L1 decryption key (to read credentials for injection) - VaultKey []byte - - // DBPath is path to the vault SQLite database - DBPath string - - // LLMEnabled enables LLM-based intent evaluation before credential injection - LLMEnabled bool - - // LLMBaseURL is the LLM API base URL (OpenAI-compatible) - LLMBaseURL string - - // LLMAPIKey is the API key for LLM requests - LLMAPIKey string - - // LLMModel is the model to use for policy evaluation - LLMModel string -} - -// Proxy is the MITM proxy server. -type Proxy struct { - cfg Config - ca *tls.Certificate - caCert *x509.Certificate - caKey *rsa.PrivateKey - certMu sync.Mutex - certs map[string]*tls.Certificate // hostname → generated cert (cache) -} - -// New creates a new Proxy. Generates or loads the CA cert from DataDir. -func New(cfg Config) (*Proxy, error) { - p := &Proxy{ - cfg: cfg, - certs: make(map[string]*tls.Certificate), - } - if err := p.loadOrCreateCA(); err != nil { - return nil, fmt.Errorf("proxy CA: %w", err) - } - return p, nil -} - -// ListenAndServe starts the proxy server. Blocks until stopped. -func (p *Proxy) ListenAndServe() error { - ln, err := net.Listen("tcp", p.cfg.ListenAddr) - if err != nil { - return fmt.Errorf("proxy listen %s: %w", p.cfg.ListenAddr, err) - } - log.Printf("proxy: listening on %s (LLM policy: %v)", p.cfg.ListenAddr, p.cfg.LLMEnabled) - srv := &http.Server{ - Handler: p, - ReadTimeout: 30 * time.Second, - WriteTimeout: 30 * time.Second, - } - return srv.Serve(ln) -} - -// ServeHTTP handles all incoming proxy requests. -func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { - if r.Method == http.MethodConnect { - p.handleCONNECT(w, r) - return - } - p.handleHTTP(w, r) -} - -// handleHTTP handles plain HTTP proxy requests. -func (p *Proxy) handleHTTP(w http.ResponseWriter, r *http.Request) { - // Remove proxy-specific headers - r.RequestURI = "" - r.Header.Del("Proxy-Connection") - r.Header.Del("Proxy-Authenticate") - r.Header.Del("Proxy-Authorization") - - // Inject credentials if applicable - if err := p.injectCredentials(r); err != nil { - log.Printf("proxy: credential injection error for %s: %v", r.URL.Host, err) - // Non-fatal: continue without injection - } - - // Forward the request - rp := &httputil.ReverseProxy{ - Director: func(req *http.Request) {}, - } - rp.ServeHTTP(w, r) -} - -// handleCONNECT handles HTTPS CONNECT tunnel requests. -func (p *Proxy) handleCONNECT(w http.ResponseWriter, r *http.Request) { - host := r.Host - if !strings.Contains(host, ":") { - host = host + ":443" - } - hostname, _, _ := net.SplitHostPort(host) - - // Acknowledge the CONNECT - w.WriteHeader(http.StatusOK) - - // Hijack the connection - hijacker, ok := w.(http.Hijacker) - if !ok { - log.Printf("proxy: CONNECT hijack not supported") - return - } - clientConn, _, err := hijacker.Hijack() - if err != nil { - log.Printf("proxy: CONNECT hijack error: %v", err) - return - } - defer clientConn.Close() - - // Generate a certificate for this hostname - cert, err := p.certForHost(hostname) - if err != nil { - log.Printf("proxy: cert generation failed for %s: %v", hostname, err) - return - } - - // Wrap client connection in TLS (using our MITM cert) - tlsCfg := &tls.Config{ - Certificates: []tls.Certificate{*cert}, - MinVersion: tls.VersionTLS12, - } - tlsClientConn := tls.Server(clientConn, tlsCfg) - defer tlsClientConn.Close() - if err := tlsClientConn.Handshake(); err != nil { - log.Printf("proxy: TLS handshake failed for %s: %v", hostname, err) - return - } - - // Connect to real upstream - upstreamConn, err := tls.Dial("tcp", host, &tls.Config{ - ServerName: hostname, - MinVersion: tls.VersionTLS12, - }) - if err != nil { - log.Printf("proxy: upstream dial failed for %s: %v", host, err) - return - } - defer upstreamConn.Close() - - // Intercept HTTP traffic between client and upstream - p.interceptHTTP(tlsClientConn, upstreamConn, hostname) -} - -// interceptHTTP reads HTTP requests from the client, injects credentials, forwards to upstream. -func (p *Proxy) interceptHTTP(clientConn net.Conn, upstreamConn net.Conn, hostname string) { - // Use Go's http.ReadRequest to parse the client's request - clientReader := newBufReader(clientConn) - - for { - req, err := http.ReadRequest(clientReader) - if err != nil { - if err != io.EOF { - log.Printf("proxy: read request error for %s: %v", hostname, err) - } - return - } - - // Set the correct URL for upstream forwarding - req.URL.Scheme = "https" - req.URL.Host = hostname - req.RequestURI = "" - - // Inject credentials - if err := p.injectCredentials(req); err != nil { - log.Printf("proxy: credential injection error for %s: %v", hostname, err) - } - - // Forward to upstream - if err := req.Write(upstreamConn); err != nil { - log.Printf("proxy: upstream write error for %s: %v", hostname, err) - return - } - - // Read upstream response and forward to client - upstreamReader := newBufReader(upstreamConn) - resp, err := http.ReadResponse(upstreamReader, req) - if err != nil { - log.Printf("proxy: upstream read error for %s: %v", hostname, err) - return - } - defer resp.Body.Close() - - if err := resp.Write(clientConn); err != nil { - log.Printf("proxy: client write error for %s: %v", hostname, err) - return - } - } -} - -// injectCredentials scans the request for credential placeholders and injects them. -// Placeholder format: {{clavitor.entry_title.field_label}} in headers, URL, or body. -// Also applies host-based automatic injection rules from vault. -// L2 (identity/card) fields are NEVER injected. -func (p *Proxy) injectCredentials(r *http.Request) error { - if p.cfg.VaultKey == nil { - return nil // No vault key — skip injection - } - - // Check for LLM policy evaluation - if p.cfg.LLMEnabled { - allowed, reason, err := p.evaluatePolicy(r) - if err != nil { - log.Printf("proxy: LLM policy eval error: %v (allowing)", err) - } else if !allowed { - log.Printf("proxy: LLM policy DENIED %s %s: %s", r.Method, r.URL, reason) - return fmt.Errorf("policy denied: %s", reason) - } - } - - // TODO: Implement placeholder substitution once vault DB integration is wired in. - // Pattern: scan r.Header values, r.URL, r.Body for {{clavitor.TITLE.FIELD}} - // Lookup entry by title (case-insensitive), get field by label, verify Tier != L2 - // Replace placeholder with decrypted field value. - // - // Auto-injection (host rules): - // Vault entries can specify "proxy_inject_hosts": ["api.github.com"] in metadata - // When a request matches, inject the entry's L1 fields as headers per a configured map. - // - // This stub returns nil — no injection until DB wiring is complete. - return nil -} - -// evaluatePolicy calls the configured LLM to evaluate whether this request -// is consistent with the expected behavior of an AI agent (vs. exfiltration/abuse). -func (p *Proxy) evaluatePolicy(r *http.Request) (allowed bool, reason string, err error) { - if p.cfg.LLMBaseURL == "" || p.cfg.LLMAPIKey == "" { - return true, "LLM not configured", nil - } - - // Build a concise request summary for the LLM - summary := fmt.Sprintf("Method: %s\nHost: %s\nPath: %s\nContent-Type: %s", - r.Method, r.Host, r.URL.Path, - r.Header.Get("Content-Type")) - - prompt := `You are a security policy evaluator for an AI agent credential proxy. - -The following outbound HTTP request is about to have credentials injected and be forwarded. -Evaluate whether this request is consistent with normal AI agent behavior (coding, API calls, deployment) -vs. suspicious activity (credential exfiltration, unexpected destinations, data harvesting). - -Request summary: -` + summary + ` - -Respond with JSON only: {"allowed": true/false, "reason": "one sentence"}` - - _ = prompt // Used when LLM call is implemented below - - // TODO: Implement actual LLM call using cfg.LLMBaseURL + cfg.LLMAPIKey + cfg.LLMModel - // For now: always allow (policy eval is opt-in, not blocking by default) - // Real implementation: POST to /v1/chat/completions, parse JSON response - return true, "policy evaluation not yet implemented", nil -} - -// certForHost returns a TLS certificate for the given hostname, generating one if needed. -func (p *Proxy) certForHost(hostname string) (*tls.Certificate, error) { - p.certMu.Lock() - defer p.certMu.Unlock() - - if cert, ok := p.certs[hostname]; ok { - // Check if cert is still valid (> 1 hour remaining) - if time.Until(cert.Leaf.NotAfter) > time.Hour { - return cert, nil - } - } - - // Generate a new cert signed by our CA - cert, err := p.generateCert(hostname) - if err != nil { - return nil, err - } - p.certs[hostname] = cert - return cert, nil -} - -// generateCert generates a TLS cert for the given hostname, signed by the proxy CA. -func (p *Proxy) generateCert(hostname string) (*tls.Certificate, error) { - key, err := rsa.GenerateKey(rand.Reader, 2048) - if err != nil { - return nil, fmt.Errorf("generate key: %w", err) - } - - serial, _ := rand.Int(rand.Reader, new(big.Int).Lsh(big.NewInt(1), 128)) - tmpl := &x509.Certificate{ - SerialNumber: serial, - Subject: pkix.Name{CommonName: hostname}, - DNSNames: []string{hostname}, - NotBefore: time.Now().Add(-time.Minute), - NotAfter: time.Now().Add(24 * time.Hour), - KeyUsage: x509.KeyUsageDigitalSignature, - ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, - } - - // Add IP SAN if hostname is an IP - if ip := net.ParseIP(hostname); ip != nil { - tmpl.IPAddresses = []net.IP{ip} - tmpl.DNSNames = nil - } - - certDER, err := x509.CreateCertificate(rand.Reader, tmpl, p.caCert, &key.PublicKey, p.caKey) - if err != nil { - return nil, fmt.Errorf("create cert: %w", err) - } - - leaf, err := x509.ParseCertificate(certDER) - if err != nil { - return nil, fmt.Errorf("parse cert: %w", err) - } - - tlsCert := &tls.Certificate{ - Certificate: [][]byte{certDER}, - PrivateKey: key, - Leaf: leaf, - } - return tlsCert, nil -} - -// loadOrCreateCA loads the proxy CA cert/key from DataDir, or generates new ones. -func (p *Proxy) loadOrCreateCA() error { - caDir := filepath.Join(p.cfg.DataDir, "proxy") - if err := os.MkdirAll(caDir, 0700); err != nil { - return err - } - certPath := filepath.Join(caDir, "ca.crt") - keyPath := filepath.Join(caDir, "ca.key") - - // Try to load existing CA - if _, err := os.Stat(certPath); err == nil { - certPEM, err := os.ReadFile(certPath) - if err != nil { - return fmt.Errorf("read CA cert: %w", err) - } - keyPEM, err := os.ReadFile(keyPath) - if err != nil { - return fmt.Errorf("read CA key: %w", err) - } - tlsCert, err := tls.X509KeyPair(certPEM, keyPEM) - if err != nil { - return fmt.Errorf("parse CA keypair: %w", err) - } - tlsCert.Leaf, err = x509.ParseCertificate(tlsCert.Certificate[0]) - if err != nil { - return fmt.Errorf("parse CA cert: %w", err) - } - // Check expiry — regenerate if < 7 days left - if time.Until(tlsCert.Leaf.NotAfter) < 7*24*time.Hour { - log.Printf("proxy: CA cert expires soon (%s), regenerating", tlsCert.Leaf.NotAfter.Format("2006-01-02")) - } else { - p.ca = &tlsCert - p.caCert = tlsCert.Leaf - p.caKey = tlsCert.PrivateKey.(*rsa.PrivateKey) - log.Printf("proxy: loaded CA cert (expires %s)", tlsCert.Leaf.NotAfter.Format("2006-01-02")) - return nil - } - } - - // Generate new CA - log.Printf("proxy: generating new CA cert...") - key, err := rsa.GenerateKey(rand.Reader, 4096) - if err != nil { - return fmt.Errorf("generate CA key: %w", err) - } - - serial, _ := rand.Int(rand.Reader, new(big.Int).Lsh(big.NewInt(1), 128)) - tmpl := &x509.Certificate{ - SerialNumber: serial, - Subject: pkix.Name{CommonName: "Clavitor Proxy CA", Organization: []string{"Clavitor"}}, - NotBefore: time.Now().Add(-time.Minute), - NotAfter: time.Now().Add(365 * 24 * time.Hour), - KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, - BasicConstraintsValid: true, - IsCA: true, - MaxPathLen: 0, - } - - certDER, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key) - if err != nil { - return fmt.Errorf("create CA cert: %w", err) - } - leaf, _ := x509.ParseCertificate(certDER) - - // Write to disk - certFile, err := os.OpenFile(certPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) - if err != nil { - return fmt.Errorf("write CA cert: %w", err) - } - pem.Encode(certFile, &pem.Block{Type: "CERTIFICATE", Bytes: certDER}) - certFile.Close() - - keyFile, err := os.OpenFile(keyPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) - if err != nil { - return fmt.Errorf("write CA key: %w", err) - } - pem.Encode(keyFile, &pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)}) - keyFile.Close() - - p.ca = &tls.Certificate{Certificate: [][]byte{certDER}, PrivateKey: key, Leaf: leaf} - p.caCert = leaf - p.caKey = key - - log.Printf("proxy: CA cert generated at %s (install in OS trust store or pass --proxy-ca)", certPath) - log.Printf("proxy: CA cert path: %s", certPath) - return nil -} - -// CACertPath returns the path to the proxy CA certificate (for user installation). -func (p *Proxy) CACertPath() string { - return filepath.Join(p.cfg.DataDir, "proxy", "ca.crt") -} diff --git a/clavitor.ai/main.go b/clavitor.ai/main.go index 4ef5f8b..153d6b6 100644 --- a/clavitor.ai/main.go +++ b/clavitor.ai/main.go @@ -155,135 +155,229 @@ func geoHandler(w http.ResponseWriter, r *http.Request) { io.Copy(w, resp.Body) } -// classifyDomain returns a scope category for a domain. -// Uses keyword matching on domain parts. Fast, no external calls. +// classifyDomain asks an LLM to classify a domain into a scope category. +// Called once per unknown domain. Result cached in SQLite forever. func classifyDomain(domain string) string { - d := strings.ToLower(domain) + d := strings.ToLower(strings.TrimSpace(domain)) - // Finance: banks, payments, crypto exchanges, investment - financeKW := []string{"bank", "chase", "citi", "wells", "fargo", "amex", "visa", "mastercard", - "paypal", "venmo", "zelle", "stripe", "square", "plaid", "wise", "revolut", - "coinbase", "binance", "kraken", "gemini", "robinhood", "fidelity", "schwab", - "vanguard", "etrade", "tdameritrade", "mint", "intuit", "turbotax", "irs", - "quickbooks", "freshbooks", "xero", "capital", "discover", "synchrony", - "mbank", "ing", "rabobank", "abn", "sns", "bunq", "n26", "monzo", "starling"} - for _, kw := range financeKW { - if strings.Contains(d, kw) { return "finance" } + // Local IPs — no LLM needed + if strings.HasPrefix(d, "192.168.") || strings.HasPrefix(d, "10.") || + strings.HasPrefix(d, "172.16.") || strings.HasPrefix(d, "172.17.") || + strings.HasPrefix(d, "172.18.") || strings.HasPrefix(d, "172.19.") || + strings.HasPrefix(d, "172.2") || strings.HasPrefix(d, "172.3") || + d == "localhost" || d == "127.0.0.1" { + return "home" } - // Social: social media, messaging, dating - socialKW := []string{"facebook", "instagram", "twitter", "tiktok", "snapchat", "pinterest", - "linkedin", "reddit", "tumblr", "discord", "slack", "telegram", "whatsapp", - "signal", "messenger", "mastodon", "threads", "bluesky", "truth", - "tinder", "bumble", "hinge", "match", "okcupid", "nextdoor", "meetup"} - for _, kw := range socialKW { - if strings.Contains(d, kw) { return "social" } + // Ask LLM + scope := classifyViaLLM(d) + if scope != "" { + return scope } - - // Shopping: e-commerce, retail - shopKW := []string{"amazon", "ebay", "etsy", "walmart", "target", "bestbuy", "costco", - "aliexpress", "alibaba", "shopify", "wayfair", "homedepot", "lowes", - "ikea", "zappos", "nike", "adidas", "macys", "nordstrom", "sephora", - "bol.com", "coolblue", "zalando", "asos", "shein", "temu", "wish"} - for _, kw := range shopKW { - if strings.Contains(d, kw) { return "shopping" } - } - - // Dev: code, cloud, tools - devKW := []string{"github", "gitlab", "bitbucket", "stackoverflow", "npmjs", "pypi", - "docker", "kubernetes", "aws", "azure", "gcloud", "digitalocean", "heroku", - "vercel", "netlify", "cloudflare", "sentry", "datadog", "grafana", - "jira", "atlassian", "confluence", "jetbrains", "vscode", "terraform", - "jenkins", "circleci", "travisci", "codecov", "snyk", "sonar", - "openai", "anthropic", "huggingface", "replicate", "railway", - "postman", "swagger", "figma", "canva", "miro", "notion", - "tailscale", "wireguard", "openvpn", "pihole", "unifi", "ubiquiti", - "synology", "qnap", "proxmox", "portainer", "pikapod"} - for _, kw := range devKW { - if strings.Contains(d, kw) { return "dev" } - } - - // Work: corporate, HR, business - workKW := []string{"salesforce", "hubspot", "zendesk", "servicenow", "workday", - "adp", "gusto", "rippling", "bamboohr", "greenhouse", "lever", - "docusign", "hellosign", "zoom", "webex", "teams", "office", - "google.com/a", "workspace", "dropbox", "box.com", - "monday.com", "asana", "basecamp", "clickup", "trello", - "solarwinds", "datto", "kaseya", "connectwise", "autotask", - "openprovider", "namecheap", "godaddy", "cloudflare"} - for _, kw := range workKW { - if strings.Contains(d, kw) { return "work" } - } - - // Email - emailKW := []string{"gmail", "outlook", "hotmail", "yahoo", "protonmail", "proton.me", - "icloud", "zoho", "fastmail", "tutanota", "hey.com", "mail."} - for _, kw := range emailKW { - if strings.Contains(d, kw) { return "email" } - } - - // Media: streaming, news, entertainment - mediaKW := []string{"netflix", "hulu", "disney", "hbo", "spotify", "apple.com/tv", - "youtube", "twitch", "plex", "jellyfin", "emby", "audible", - "kindle", "nytimes", "washingtonpost", "bbc", "cnn", "reuters", - "crunchyroll", "paramount", "peacock", "dazn", "espn", - "steam", "epic", "playstation", "xbox", "nintendo"} - for _, kw := range mediaKW { - if strings.Contains(d, kw) { return "media" } - } - - // Health: medical, pharmacy, insurance - healthKW := []string{"mychart", "epic", "cerner", "healthgrades", "zocdoc", - "cvs", "walgreens", "express-scripts", "optum", "cigna", - "unitedhealth", "anthem", "aetna", "kaiser", "bluecross", - "mdlive", "teladoc", "onemedical", "goodrx", "medline", - "hopkinsmedicine", "mayoclinic", "clevelandclinic"} - for _, kw := range healthKW { - if strings.Contains(d, kw) { return "health" } - } - - // Travel: airlines, hotels, rental - travelKW := []string{"airline", "united.com", "delta.com", "american", "southwest", - "jetblue", "spirit", "frontier", "ryanair", "easyjet", "lufthansa", - "klm", "airfrance", "british-airways", "emirates", "qatar", - "marriott", "hilton", "hyatt", "ihg", "airbnb", "booking.com", - "expedia", "kayak", "tripadvisor", "vrbo", "hertz", "avis", - "enterprise", "turo", "uber", "lyft", "grab", "flightradar", - "seatguru", "flightaware", "wagoneer", "zipcar"} - for _, kw := range travelKW { - if strings.Contains(d, kw) { return "travel" } - } - - // Education - eduKW := []string{".edu", "coursera", "udemy", "edx", "khanacademy", - "duolingo", "brilliant", "skillshare", "lynda", "pluralsight", - "codecademy", "udacity", "mit.edu", "stanford", "harvard", - "canvas", "blackboard", "schoology", "veracross", "powerschool"} - for _, kw := range eduKW { - if strings.Contains(d, kw) { return "education" } - } - - // Government - govKW := []string{".gov", "irs.gov", "ssa.gov", "dmv", "uscis", "state.gov", - "healthcare.gov", "login.gov", "id.me", "digid", "mijnoverheid", - "belastingdienst", "rijksoverheid"} - for _, kw := range govKW { - if strings.Contains(d, kw) { return "government" } - } - - // Home: IoT, ISP, local - homeKW := []string{"nest", "ring", "simplisafe", "adt", "ecobee", "myq", - "philips-hue", "sonos", "arlo", "wyze", "blink", "eufy", - "comcast", "xfinity", "spectrum", "att.com", "verizon", "tmobile", - "cox", "frontier", "centurylink", "starlink", "mylio", - "quooker", "hue", "homekit"} - for _, kw := range homeKW { - if strings.Contains(d, kw) { return "home" } - } - return "misc" } +// classifyBatch classifies all domains in one LLM call. +// Returns map[domain]scope. Domains that fail get "misc". +func classifyBatch(domains []string) map[string]string { + result := make(map[string]string) + + // Handle local IPs without LLM + var toLLM []string + for _, d := range domains { + if strings.HasPrefix(d, "192.168.") || strings.HasPrefix(d, "10.") || + strings.HasPrefix(d, "172.") || d == "localhost" || d == "127.0.0.1" { + result[d] = "home" + } else { + toLLM = append(toLLM, d) + } + } + if len(toLLM) == 0 { + return result + } + + // Build domain list for prompt + domainList := strings.Join(toLLM, "\n") + + prompt := fmt.Sprintf( + `Classify each domain into exactly ONE category. Reply with ONLY lines in format: domain=category +No explanations. One line per domain. Pick the single MOST specific category. + +Categories: finance, social, shopping, work, dev, email, media, health, travel, home, education, government + +Domains: +%s`, domainList) + + reqBody, _ := json.Marshal(map[string]any{ + "model": "anthropic/claude-haiku-4-5", + "messages": []map[string]string{ + {"role": "system", "content": "You classify internet domains. Reply ONLY with lines in format: domain=category. No explanations, no reasoning, no markdown."}, + {"role": "user", "content": prompt}, + }, + "max_tokens": 4096, + "temperature": 0, + }) + + apiKey := os.Getenv("OPENROUTER_API_KEY") + if apiKey == "" { + log.Printf("classifyBatch: OPENROUTER_API_KEY not set") + return result + } + + req, err := http.NewRequest("POST", "https://openrouter.ai/api/v1/chat/completions", strings.NewReader(string(reqBody))) + if err != nil { + return result + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+apiKey) + + client := &http.Client{Timeout: 120 * time.Second} + resp, err := client.Do(req) + if err != nil { + log.Printf("classifyBatch: LLM call error: %v", err) + return result + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + var llmResult struct { + Choices []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` + } + if err := json.Unmarshal(body, &llmResult); err != nil || len(llmResult.Choices) == 0 { + snippet := string(body) + if len(snippet) > 300 { snippet = snippet[:300] } + log.Printf("classifyBatch: LLM error (status=%d): %s", resp.StatusCode, snippet) + return result + } + + // Parse response lines: "domain=scope1,scope2" + content := llmResult.Choices[0].Message.Content + log.Printf("classifyBatch: raw LLM response (%d chars): %s", len(content), content) + // Strip thinking tags + if idx := strings.Index(content, ""); idx >= 0 { + content = content[idx+8:] + } + for _, line := range strings.Split(content, "\n") { + line = strings.TrimSpace(line) + // Try multiple separators: =, :, - + var domain, scopes string + for _, sep := range []string{"=", ": ", " - "} { + parts := strings.SplitN(line, sep, 2) + if len(parts) == 2 { + domain = strings.TrimSpace(parts[0]) + scopes = strings.TrimSpace(parts[1]) + break + } + } + if domain == "" || scopes == "" { + continue + } + // Strip backticks, quotes, markdown + domain = strings.Trim(domain, "`\"'*- ") + scopes = strings.Trim(scopes, "`\"'*- ") + // Validate each scope + var valid []string + for _, s := range strings.Split(scopes, ",") { + s = strings.TrimSpace(s) + if validScopes[s] { + valid = append(valid, s) + } + } + if len(valid) > 0 { + result[domain] = strings.Join(valid, ",") + } + } + + log.Printf("classifyBatch: classified %d/%d domains", len(result), len(domains)) + return result +} + +var validScopes = map[string]bool{ + "finance": true, "social": true, "shopping": true, "work": true, + "dev": true, "email": true, "media": true, "health": true, + "travel": true, "home": true, "education": true, "government": true, + "misc": true, +} + +func classifyViaLLM(domain string) string { + prompt := fmt.Sprintf( + `Classify this internet domain into one or more categories. Reply with ONLY the category names, comma-separated, nothing else. Pick the most specific ones. Use "misc" only if nothing else fits. + +Categories: finance, social, shopping, work, dev, email, media, health, travel, home, education, government + +Domain: %s`, domain) + + reqBody, _ := json.Marshal(map[string]any{ + "model": "anthropic/claude-haiku-4-5", + "messages": []map[string]string{ + {"role": "user", "content": prompt}, + }, + "max_tokens": 200, + "temperature": 0, + }) + + req, err := http.NewRequest("POST", "https://openrouter.ai/api/v1/chat/completions", strings.NewReader(string(reqBody))) + if err != nil { + log.Printf("classify LLM req error: %v", err) + return "" + } + req.Header.Set("Content-Type", "application/json") + apiKey := os.Getenv("OPENROUTER_API_KEY") + if apiKey == "" { + log.Printf("classify: OPENROUTER_API_KEY not set") + return "" + } + req.Header.Set("Authorization", "Bearer "+apiKey) + + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + log.Printf("classify LLM call error: %v", err) + return "" + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + var result struct { + Choices []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` + } + if err := json.Unmarshal(body, &result); err != nil || len(result.Choices) == 0 { + snippet := string(body) + if len(snippet) > 500 { snippet = snippet[:500] } + log.Printf("classify LLM error (status=%d): %s", resp.StatusCode, snippet) + return "" + } + + raw := strings.ToLower(strings.TrimSpace(result.Choices[0].Message.Content)) + // Strip thinking tags + if idx := strings.Index(raw, ""); idx >= 0 { + raw = strings.TrimSpace(raw[idx+8:]) + } + // Validate each scope + var valid []string + for _, s := range strings.Split(raw, ",") { + s = strings.TrimSpace(s) + if validScopes[s] { + valid = append(valid, s) + } + } + if len(valid) == 0 { + log.Printf("classify: %s → invalid response %q, defaulting to misc", domain, raw) + return "misc" + } + scope := strings.Join(valid, ",") + log.Printf("classify: %s → %s", domain, scope) + return scope +} + func main() { if _, err := os.Stat("templates"); err == nil { devMode = true @@ -353,13 +447,29 @@ func main() { } } - // Classify uncached domains using built-in rules - for _, d := range uncached { - scope := classifyDomain(d) - result[d] = scope - // Cache it - db.Exec("INSERT OR REPLACE INTO domain_scopes (domain, scope, created_at) VALUES (?, ?, ?)", - d, scope, time.Now().Unix()) + // Classify uncached domains in chunks (max 200 per LLM call to stay within output token limits) + if len(uncached) > 0 { + chunkSize := 200 + for i := 0; i < len(uncached); i += chunkSize { + end := i + chunkSize + if end > len(uncached) { + end = len(uncached) + } + chunk := uncached[i:end] + batch := classifyBatch(chunk) + for _, d := range chunk { + scope := batch[d] + if scope == "" || scope == "misc" { + result[d] = "misc" + } else { + result[d] = scope + db.Exec("INSERT OR REPLACE INTO domain_scopes (domain, scope, created_at) VALUES (?, ?, ?)", + d, scope, time.Now().Unix()) + } + } + } + log.Printf("classify: %d uncached domains in %d chunks, %d cached total", + len(uncached), (len(uncached)+chunkSize-1)/chunkSize, len(result)) } w.Header().Set("Content-Type", "application/json") @@ -451,6 +561,9 @@ func main() { http.HandleFunc("/upgrade", func(w http.ResponseWriter, r *http.Request) { render(w, PageData{Page: "upgrade", Title: "Upgrade to Clavitor — clavitor", ActiveNav: "upgrade"}) }) + http.HandleFunc("/developers", func(w http.ResponseWriter, r *http.Request) { + render(w, PageData{Page: "developers", Title: "Developers — clavitor", ActiveNav: "developers"}) + }) http.HandleFunc("/integrations/claude-code", func(w http.ResponseWriter, r *http.Request) { render(w, PageData{Page: "claude-code", Title: "Clavitor + Claude Code — Secure credential access", ActiveNav: "integrations"}) }) diff --git a/clavitor.com/.DS_Store b/clavitor.com/.DS_Store deleted file mode 100644 index 3a51048..0000000 Binary files a/clavitor.com/.DS_Store and /dev/null differ diff --git a/clavitor.com/CLAUDE.md b/clavitor.com/CLAUDE.md deleted file mode 100644 index e201665..0000000 --- a/clavitor.com/CLAUDE.md +++ /dev/null @@ -1,106 +0,0 @@ -# Clavitor Website — clavitor.com - -## Foundation First — No Mediocrity. Ever. -The rule is simple: do it right, or say something. - -Johan is an architect. Architects do not patch cracks in a bad foundation — they rebuild. Every agent on this team operates the same way. - -**What this means in practice:** -- If you need three fixes for one problem, stop. Something fundamental is wrong. Name it, surface it — we fix that, not the symptom. -- If the code is spaghetti, say so. Do not add another workaround. The workaround is the problem now. -- Quick fixes are not fixes. A "temporary" hack that ships is permanent. If it is not the right solution, it is the wrong solution. -- Foundation > speed. A solid base makes everything downstream easy. A shaky base makes everything downstream a nightmare. We build bases. - -**The restart rule:** When the foundation is wrong: start over. Not "refactor slightly." Not "add an abstraction layer on top." Start over. This applies to code, infrastructure, design, encryption schemes, and written work alike. - -**Q&D is research, not output:** Exploratory/throwaway work has its place — but it stays in research. Nothing Q&D ships. Nothing Q&D becomes the production path. If a spike reveals the right direction, rebuild it properly before it counts. - -**When you hit a bad foundation:** Call it out. Do not work around it. Bad foundations are not your fault — but silently building on them is. Surface the problem, we work on it together. - -The bar is high. The support is real. These rules apply to you now. - -## Architecture -- Go web server (`main.go`) with `go:embed` for templates, CSS, SVGs, PNGs -- Templates in `templates/*.tmpl`, single CSS in `clavitor.css` -- SQLite DB: `clavitor.db` (pops, telemetry, uptime, incidents, accounts, vaults, sessions) -- Dev mode: auto-detected when `templates/` dir exists on disk — reloads templates per request, but CSS/SVGs require rebuild (`go:embed`) -- Port 8099 -- License: Elastic License 2.0 (NOT MIT) - -## Deployment - -### Dev (forge = 192.168.1.16, Florida — dev.clavitor.ai) -``` -make dev # build + restart locally -make deploy-dev # same thing -``` -Dev runs on forge (localhost). `dev.clavitor.ai` DNS points to home IP. - -### Prod (Zürich — clavitor.ai — clavitor.ai) -``` -make deploy-prod # cross-compile amd64, scp to Zürich, restart systemd -``` -Prod runs at `/opt/clavitor-web/` as systemd service `clavitor-web`. -Caddy reverse proxies `clavitor.ai`, `clavitor.com`, `www.clavitor.ai`, `www.clavitor.com` → `localhost:8099`. - -### First-time setup (already done) -``` -make setup-prod # creates /opt/clavitor-web, systemd service, uploads binary+db -``` -Then manually update `/etc/caddy/Caddyfile` to reverse_proxy. - -### SSH -- Prod: `ssh root@clavitor.ai` -- Tailscale: `zurich` (100.70.148.118) — SSH may be blocked via Tailscale - -## Build & Run -``` -CGO_ENABLED=1 go build -o clavitor-web . -./clavitor-web -``` -CSS and SVG changes require rebuild (embedded at compile time). Template changes reload in dev mode. - -## Brand & Design -- Light mode only. Single source of truth: `clavitor.css` -- Logo: the black square (`#0A0A0A`). favicon.svg = black square -- Colors: black `#0A0A0A` (brand), red `#DC2626` (accent), light red `#F5B7B7` (planned/secondary), grayscale -- No purple. No green (except inherited SVG diagrams). Red is the only accent. -- Square shapes for permanent UI elements. Circles only for transient animations (pulses, "You" dot) -- Fonts: Figtree (body), JetBrains Mono (code/monospace) -- No inline styles, no CSS in templates. Everything in clavitor.css. -- Always capitalize "Clavitor" in prose. Lowercase in code/paths/commands. - -## Encryption Terminology -- **Vault Encryption** — whole vault at rest -- **Credential Encryption** — per-field, server-side (AI agents can read via CLI) -- **Identity Encryption** — per-field, client-side via WebAuthn PRF (Touch ID only, server cannot decrypt) -- Never use "sealed fields", "agent fields", "L1", "L2", "L3" -- Agents use CLI, NOT MCP (MCP exposes plaintext; CLI is scoped) - -## POPs (Points of Presence) -- Stored in `pops` table in clavitor.db — the single source of truth -- Map on /hosted is generated dynamically from DB via JavaScript -- Zürich = HQ, black dot, larger (11×11). Live POPs = red. Planned = light red. -- "You" visitor dot = circle (not square — "you" is not clavitor) - -## Key URLs -- `/hosted` — hosted product page with dynamic world map -- `/glass` — looking glass (latency from user's browser) -- `/noc?pin=250365` — NOC dashboard (telemetry, read-only, hardcoded PIN) -- `/telemetry` — POST endpoint for POP agent heartbeats (no auth) -- `/ping` — server-side TCP ping (for diagnostics) - -## Vault Binary -- Source: `~/dev/clavitor/clovis/clovis-vault/` -- Build for ARM64: `cd ~/dev/clavitor/clovis/clovis-vault && GOOS=linux GOARCH=arm64 CGO_ENABLED=0 go build -o clavitor-linux-arm64 ./cmd/clavitor` -- All POPs are ARM64 (AWS t4g.micro) -- Vault runs on port 1984 with TLS -- Has `/ping` endpoint (11 bytes, no DB, CORS via middleware) for looking glass -- Has `/health` endpoint (heavier, queries DB) - -## Providers -- AWS: most POPs (free tier t4g.micro) -- LightNode: Santiago, Bogotá, Manila, Dhaka -- ishosting: Istanbul, Almaty -- HostAfrica: Lagos, Nairobi -- Rackmill: Perth diff --git a/clavitor.com/Makefile b/clavitor.com/Makefile deleted file mode 100644 index 3233a1d..0000000 --- a/clavitor.com/Makefile +++ /dev/null @@ -1,79 +0,0 @@ -BINARY = clavitor-web -PROD_HOST = root@clavitor.ai -PROD_DIR = /opt/clavitor-web -PORT = 8099 - -.PHONY: build dev deploy-dev deploy-prod setup-prod - -# Build for local (dev on forge/localhost) -build: - CGO_ENABLED=1 go build -o $(BINARY) . - -# Run locally (dev mode — templates reload from disk) -dev: build - pkill -f ./$(BINARY) 2>/dev/null || true - sleep 0.5 - ./$(BINARY) & - @echo "→ dev.clavitor.ai / http://localhost:$(PORT)" - -# Deploy to dev (forge = localhost, dev.clavitor.ai points here) -deploy-dev: build - pkill -f ./$(BINARY) 2>/dev/null || true - sleep 0.5 - ./$(BINARY) & - @echo "✓ dev deployed → dev.clavitor.ai / http://localhost:$(PORT)" - -# Build for prod (linux/amd64 for Zürich) -build-prod: - GOOS=linux GOARCH=amd64 CGO_ENABLED=1 go build -o $(BINARY)-linux-amd64 . - -# Deploy to prod (Zürich — clavitor.ai) -SSH_MUX = -o ControlMaster=auto -o ControlPath=/tmp/clavitor-deploy-%r@%h -o ControlPersist=30 - -deploy-prod: build-prod - @echo "$$(date +%H:%M:%S) upload..." - scp $(SSH_MUX) $(BINARY)-linux-amd64 $(PROD_HOST):$(PROD_DIR)/ - @echo "$$(date +%H:%M:%S) maintenance on + restart + wait + maintenance off..." - ssh $(SSH_MUX) $(PROD_HOST) "\ - sqlite3 $(PROD_DIR)/clavitor.db \"INSERT INTO maintenance (start_at, reason, started_by, ended_by) VALUES (strftime('%s','now'), 'deploy', 'makefile', '')\" && \ - cd $(PROD_DIR) && mv $(BINARY)-linux-amd64 $(BINARY) && systemctl restart clavitor-web && \ - sleep 5 && \ - sqlite3 $(PROD_DIR)/clavitor.db \"UPDATE maintenance SET end_at = strftime('%s','now'), ended_by = 'makefile' WHERE end_at IS NULL\"" - @echo "$$(date +%H:%M:%S) ✓ prod deployed → https://clavitor.ai" - -# Pull prod DB backup locally -backup-prod: - scp $(PROD_HOST):$(PROD_DIR)/clavitor.db backups/clavitor-$(shell date +%Y%m%d-%H%M%S).db - @echo "✓ prod DB backed up" - -# One-time: push local DB to prod (DESTRUCTIVE — overwrites prod data) -push-db: - @echo "⚠ This overwrites the prod database. Press Ctrl+C to cancel." - @sleep 3 - scp clavitor.db $(PROD_HOST):$(PROD_DIR)/ - ssh $(PROD_HOST) "systemctl restart clavitor-web" - @echo "✓ DB pushed to prod" - -# First-time prod setup (already done) -setup-prod: - ssh $(PROD_HOST) "mkdir -p $(PROD_DIR)" - scp $(BINARY)-linux-amd64 clavitor.db $(PROD_HOST):$(PROD_DIR)/ - ssh $(PROD_HOST) "mv $(PROD_DIR)/$(BINARY)-linux-amd64 $(PROD_DIR)/$(BINARY)" - ssh $(PROD_HOST) 'cat > /etc/systemd/system/clavitor-web.service << EOF\n\ -[Unit]\n\ -Description=clavitor-web\n\ -After=network.target\n\ -\n\ -[Service]\n\ -Type=simple\n\ -WorkingDirectory=$(PROD_DIR)\n\ -ExecStart=$(PROD_DIR)/$(BINARY)\n\ -Restart=always\n\ -RestartSec=5\n\ -Environment=PORT=$(PORT)\n\ -\n\ -[Install]\n\ -WantedBy=multi-user.target\n\ -EOF' - ssh $(PROD_HOST) "systemctl daemon-reload && systemctl enable --now clavitor-web" - @echo "✓ prod setup complete" diff --git a/clavitor.com/README.md b/clavitor.com/README.md deleted file mode 100644 index 6d81f3a..0000000 --- a/clavitor.com/README.md +++ /dev/null @@ -1,17 +0,0 @@ - -## Styleguide - -**clavitor.css** is the single global stylesheet for all clavitor web surfaces — marketing site (`clavitor-web`) and the app UI (`clavitor`). - -- Live: https://clavitor.com/styleguide.html -- Source: `clavitor-web/clavitor.css` - -### Rules (no exceptions) -1. **One stylesheet.** `clavitor.css` only. No Tailwind, no inline styles, no ` - -{{end}} diff --git a/clavitor.com/templates/styleguide.tmpl b/clavitor.com/templates/styleguide.tmpl deleted file mode 100644 index 5d0af56..0000000 --- a/clavitor.com/templates/styleguide.tmpl +++ /dev/null @@ -1,189 +0,0 @@ -{{define "styleguide-head"}} - -{{end}} - -{{define "styleguide"}} -
-
-

Design System

-

Clavitor Styleguide

-

Single source of truth. One stylesheet: clavitor.css. No inline styles in HTML.

-
- - -
-

Colors

-
-
-

Backgrounds

-
--bg
#0A1628
-
--surface
#0d1627
-
--surface-alt
#0a1a0a
-
--surface-gold
#2a1f00
-
-
-

Text

-
--text
#f1f5f9
-
--muted
#94a3b8
-
--subtle
#64748b
-
-
-

Accent

-
--accent
#22C55E
-
--gold
#D4AF37
-
--red
#EF4444
-
-
-
- - -
-

Typography

-

h1 — Your vault. Wherever you want it.

-

h2 — Identity Encryption: jurisdiction irrelevant.

-

h3 — Only you. Only in person.

-

p.lead — We run it. You own it. Pick your region — your data stays there.

-

p — Passwords and private notes are encrypted on your device with a key derived from your WebAuthn authenticator — fingerprint, face, or hardware key. We store a locked box. No key ever reaches our servers.

-

label (default)

-

label.accent

-

label.gold

-

label.red

-

clavitor — vaultname in body text

-
- - -
-

Cards

-
-
-

card (default)

-

Default surface

-

Use for neutral content. Background is --surface.

-
-
-

card.alt

-

Credential fields

-

Green-tinted surface. Use for credential layer content.

-
-
-

card.gold

-

Zürich, Switzerland

-

Gold-tinted surface. Use exclusively for Zürich/HQ.

-
-
-
-
-

card.red

-

Self-hosted

-

Red-tinted surface. Use for self-hosted / warning contexts.

-
-
-

card + card-hover

-

Hover state

-

Hover this card — lifts on hover. Add to any clickable card.

-
-
-
- - -
-

Buttons

- -

btn-row — flex wrap container for button groups

- -
- - -
-

Badges

-
- badge.accent - badge.gold - badge.red - badge.recommended -
-
- - -
-

Hero split

-

hero-split — two-column hero with text left, visual right. Use on .container instead of .hero.

-
-
-

Left column

-

Text, heading, lead paragraph, btn-row.

-
-
-

Right column

-

SVG diagram or visual. Vertically centered via align-items.

-
-
-
- - -
-

Code blocks

-
-

Terminal

-
# comment
-
$ clavitor
-
-
-

JSON — code-block pre resets margin and sets --muted

-
{
-  "mcpServers": {
-    "clavitor": {
-      "url": "http://localhost:1984/mcp",
-      "headers": { "Authorization": "Bearer token_here" }
-    }
-  }
-}
-
-
- - -
-

Grid

-

grid-2

-
-

col 1

Always 1fr.

-

col 2

Always 1fr.

-
-

grid-3

-
-

col 1

Always 1fr.

-

col 2

Always 1fr.

-

col 3

Always 1fr.

-
-
- - -
-

Spacing scale

-

All spacing via utility classes: .mt-2 .mt-3 .mt-4 .mt-6 .mt-8 .mt-12 and matching .mb-*.

-

Container: max-width 1100px, padding 2rem each side. Used everywhere.

-

Section: padding 4rem top/bottom. Separated by hr.divider.

-
- -
-

End of styleguide

-
-
-{{end}} diff --git a/clavitor.com/templates/terms.tmpl b/clavitor.com/templates/terms.tmpl deleted file mode 100644 index 9210a83..0000000 --- a/clavitor.com/templates/terms.tmpl +++ /dev/null @@ -1,53 +0,0 @@ -{{define "terms"}} -
-

Legal

-

Terms of Service

-

Last updated: February 2026

-
- -
- -
-
- -

1. Acceptance

-

By using Clavitor (the "Service"), you agree to these terms. If you don't agree, don't use the Service.

- -

2. Description

-

Clavitor is a password manager with three-tier encryption: Vault Encryption (at rest), Credential Encryption (per-field, server-side), and Identity Encryption (per-field, client-side). The hosted service stores encrypted vault data on your behalf. The self-hosted version (Elastic License 2.0) runs entirely on your own infrastructure.

- -

3. Accounts

-

You are responsible for maintaining the security of your account credentials and authenticator device. We cannot recover Identity fields if you lose access to your WebAuthn authenticator — the mathematical design prevents it.

- -

4. Acceptable use

-

You may not use the Service to store illegal content, conduct attacks, or violate applicable law. We reserve the right to suspend accounts that violate these terms.

- -

5. Payment

-

Hosted service is billed annually at $20/year (promotional pricing may apply). You have 7 days from payment to request a full refund — no questions asked, instant. After 7 days, no refunds are issued.

- -

6. Data ownership

-

Your vault data is yours. We claim no rights to it. You can export or delete it at any time.

- -

7. Service availability

-

We aim for high availability but make no uptime guarantees. Scheduled maintenance will be announced in advance. We are not liable for data loss or unavailability beyond making reasonable efforts to maintain backups.

- -

8. Encryption limitations

-

Credential fields (server-encrypted) provide strong encryption at rest and in transit. Identity fields (client-encrypted) provide an additional layer that even we cannot break. However, no system is perfectly secure. You use the Service at your own risk.

- -

9. Termination

-

You may delete your account at any time. We may suspend accounts that violate these terms. Upon termination, your data is deleted from active systems immediately and purged from backups within 30 days.

- -

10. Limitation of liability

-

The Service is provided "as is." To the maximum extent permitted by applicable law, we are not liable for indirect, incidental, or consequential damages arising from your use of the Service.

- -

11. Governing law

-

These terms are governed by the laws of Switzerland. Disputes will be resolved in the courts of Zürich, Switzerland.

- -

12. Changes

-

We'll notify users by email before making material changes to these terms.

- -

Contact

-

Questions? Email legal@clavitor.com.

-
-
-{{end}} diff --git a/clavitor.com/worldmap.svg b/clavitor.com/worldmap.svg deleted file mode 100644 index 498dfb6..0000000 --- a/clavitor.com/worldmap.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/operations/pop-sync/pop-sync b/operations/pop-sync/pop-sync index ccf5176..9c7eae5 100755 Binary files a/operations/pop-sync/pop-sync and b/operations/pop-sync/pop-sync differ