package lib import ( "bytes" "context" "encoding/json" "fmt" "log" "net/http" "os" "sync" "time" ) // --------------------------------------------------------------------------- // Severity Levels // --------------------------------------------------------------------------- type Severity int const ( SeverityDebug Severity = iota // Development/debug only SeverityInfo // Informational, no action needed SeverityWarning // Anomaly detected, monitoring required SeverityError // User-impacting issue SeverityCritical // System down / security breach ) func (s Severity) String() string { switch s { case SeverityDebug: return "debug" case SeverityInfo: return "info" case SeverityWarning: return "warning" case SeverityError: return "error" case SeverityCritical: return "critical" default: return "unknown" } } // --------------------------------------------------------------------------- // Error/Event Definition Registry // --------------------------------------------------------------------------- // EventDef defines a class of events in the registry. // This is the single source of truth for all errors and warnings. type EventDef struct { Code string // Unique code: ERR-12345 or WRN-12345 Category string // auth, database, network, security, etc. Severity Severity // How serious Message string // User-facing message (for errors shown to users) Description string // Internal description for operators AutoResolve bool // True if system can auto-resolve (e.g., retry succeeded) CreateTicket bool // True if this creates a trackable incident } // EventRegistry is the canonical list of all events. // Errors = user-impacting and create tickets. // Warnings = anomalies that may indicate future issues. var EventRegistry = map[string]EventDef{ // WARNINGS (WRN-1xxxx) - Anomalies, not yet user-impacting "WRN-10001": { Code: "WRN-10001", Category: "performance", Severity: SeverityWarning, Description: "Response time >500ms for credential lookup", AutoResolve: true, CreateTicket: false, // Log only, no ticket unless sustained }, "WRN-10002": { Code: "WRN-10002", Category: "security", Severity: SeverityWarning, Description: "Agent rate limit at 80% of threshold", AutoResolve: true, CreateTicket: false, }, "WRN-10003": { Code: "WRN-10003", Category: "storage", Severity: SeverityWarning, Description: "Disk usage >80% on vault storage", AutoResolve: false, CreateTicket: true, // Creates ticket for ops to expand storage }, "WRN-10004": { Code: "WRN-10004", Category: "network", Severity: SeverityWarning, Description: "TLS handshake latency elevated", AutoResolve: true, CreateTicket: false, }, // AUTH ERRORS (ERR-1xxxx) "ERR-10001": { Code: "ERR-10001", Category: "auth", Severity: SeverityError, Message: "Authentication failed. Please check your credentials and try again.", Description: "WebAuthn challenge verification failed", AutoResolve: false, CreateTicket: false, // User error, not system error }, "ERR-10002": { Code: "ERR-10002", Category: "auth", Severity: SeverityError, Message: "Access denied. You don't have permission for this operation.", Description: "Actor attempted operation outside their scope", AutoResolve: false, CreateTicket: false, }, "ERR-10003": { Code: "ERR-10003", Category: "auth", Severity: SeverityError, Message: "Invalid agent token. The agent may need to be re-enrolled.", Description: "CVT token validation failed", AutoResolve: false, CreateTicket: true, }, "ERR-10004": { Code: "ERR-10004", Category: "security", Severity: SeverityCritical, Message: "Agent locked due to suspicious activity. Contact the vault owner.", Description: "Agent triggered harvester defenses (two-strike lockdown)", AutoResolve: false, CreateTicket: true, }, "ERR-10005": { Code: "ERR-10005", Category: "security", Severity: SeverityCritical, Message: "Request from unauthorized IP. Token may be compromised.", Description: "Agent request from IP not in whitelist", AutoResolve: false, CreateTicket: true, }, // INPUT ERRORS (ERR-2xxxx) "ERR-20001": { Code: "ERR-20001", Category: "input", Severity: SeverityError, Message: "Invalid request. Please check your input and try again.", Description: "JSON parse failed or required field missing", AutoResolve: false, CreateTicket: false, }, "ERR-20002": { Code: "ERR-20002", Category: "input", Severity: SeverityError, Message: "Invalid ID format. The requested item may not exist.", Description: "Entry/agent ID parsing failed", AutoResolve: false, CreateTicket: false, }, // NOT FOUND ERRORS (ERR-3xxxx) "ERR-30001": { Code: "ERR-30001", Category: "not_found", Severity: SeverityError, Message: "Vault not found. Please register or check your vault path.", Description: "No vault exists at the requested path", AutoResolve: false, CreateTicket: false, }, "ERR-30002": { Code: "ERR-30002", Category: "not_found", Severity: SeverityError, Message: "Entry not found. It may have been deleted.", Description: "Requested entry ID does not exist", AutoResolve: false, CreateTicket: false, }, // SYSTEM ERRORS (ERR-5xxxx) - These POST to central "ERR-50001": { Code: "ERR-50001", Category: "database", Severity: SeverityCritical, Message: "Service temporarily unavailable. Our team has been alerted and is working on it. Reference: ERR-50001.", Description: "Vault database connection failed or file inaccessible", AutoResolve: false, CreateTicket: true, }, "ERR-50002": { Code: "ERR-50002", Category: "database", Severity: SeverityError, Message: "Failed to save data. Please try again in a moment.", Description: "Database write operation failed", AutoResolve: true, CreateTicket: true, }, "ERR-50003": { Code: "ERR-50003", Category: "storage", Severity: SeverityCritical, Message: "Service temporarily unavailable. Our team has been alerted. Reference: ERR-50003.", Description: "WL3 credential storage write failed", AutoResolve: false, CreateTicket: true, }, "ERR-50004": { Code: "ERR-50004", Category: "network", Severity: SeverityError, Message: "Connection issue. Please try again.", Description: "Failed to connect to central admin for sync", AutoResolve: true, CreateTicket: true, }, // RATE LIMITING (ERR-6xxxx) "ERR-60001": { Code: "ERR-60001", Category: "rate_limit", Severity: SeverityError, Message: "Too many requests. Please slow down and try again in a moment.", Description: "Global per-IP rate limit exceeded", AutoResolve: true, CreateTicket: false, }, "ERR-60002": { Code: "ERR-60002", Category: "rate_limit", Severity: SeverityWarning, Message: "Agent rate limit warning. Reduce request frequency.", Description: "Per-agent unique-entry quota at 90%", AutoResolve: true, CreateTicket: false, }, // INTERNAL ERRORS (ERR-9xxxx) - Invariant violations "ERR-90001": { Code: "ERR-90001", Category: "invariant", Severity: SeverityCritical, Message: "An unexpected error occurred. Please try again or contact support. Reference: ERR-90001.", Description: "Condition assumed impossible was triggered", AutoResolve: false, CreateTicket: true, }, } // --------------------------------------------------------------------------- // Incident Tracking - Centralized Error Management // --------------------------------------------------------------------------- // CentralEvent is sent to clavitor.ai for every ticket-creating event. // No deduplication - "let it rain". Central handles aggregation if needed. type CentralEvent struct { EventID string `json:"event_id"` // UUID generated locally Code string `json:"code"` // ERR-50001, etc. Category string `json:"category"` // database, network, etc. Severity string `json:"severity"` Resource string `json:"resource"` // uk1, db-primary, etc. POP string `json:"pop"` // Which POP reported this Operation string `json:"operation"` // GetEntry, agent unlock, etc. ErrorDetail string `json:"error_detail"` // The actual error message Actor string `json:"actor"` // web, agent:abc123 Timestamp int64 `json:"timestamp"` UserMessage string `json:"user_message"` // Shown to users } // CentralClient posts events to central (clavitor.ai). // Only created in commercial edition; community edition logs locally only. type CentralClient struct { centralURL string popID string apiKey string } var ( globalCentralClient *CentralClient onceCentral sync.Once ) // InitCentralClient creates the global central reporter. // Called at startup from main.go. func InitCentralClient(centralURL, popID, apiKey string) { onceCentral.Do(func() { globalCentralClient = &CentralClient{ centralURL: centralURL, popID: popID, apiKey: apiKey, } }) } // postEvent sends an event to central asynchronously. // Every event is sent individually - no local deduplication. func (c *CentralClient) postEvent(ev CentralEvent) { if c == nil { return // Community edition - no central reporting } payload, _ := json.Marshal(ev) req, err := http.NewRequest("POST", c.centralURL+"/v1/events", bytes.NewReader(payload)) if err != nil { log.Printf("[ERROR] Failed to create event request: %v", err) return } req.Header.Set("Authorization", "Bearer "+c.apiKey) req.Header.Set("Content-Type", "application/json") client := &http.Client{Timeout: 10 * time.Second} resp, err := client.Do(req) if err != nil { log.Printf("[ERROR] Failed to post event to central: %v", err) return } defer resp.Body.Close() if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { log.Printf("[ERROR] Central rejected event: %d", resp.StatusCode) } } // --------------------------------------------------------------------------- // Event Logging - The main interface for handlers // --------------------------------------------------------------------------- // EventContext holds all the searchable fields for an event. type EventContext struct { Code string // ERR-50001, WRN-10001, etc. Category string // auth, database, network, security Resource string // Which resource: uk1, db-primary, vault-file-123 Operation string // What was being attempted: "GetEntry", "agent unlock" Actor string // web, agent:abc123, extension ErrorDetail string // The actual error: "connection refused", "disk full" IPAddr string // Client IP Severity Severity } // LogEvent is the main entry point for all events (errors and warnings). // This replaces direct AuditLog calls for error cases. func LogEvent(db *DB, ctx context.Context, ec EventContext) { def, ok := EventRegistry[ec.Code] if !ok { log.Printf("[ERROR] Unknown event code: %s", ec.Code) def = EventRegistry["ERR-90001"] ec.Code = "ERR-90001" ec.Severity = SeverityCritical } // Log to local audit (searchable fields) // Title contains structured data: resource, error, operation // Searchable: grep "resource=uk1" audit.log auditTitle := fmt.Sprintf("op=%s | resource=%s | error=%s | %s", ec.Operation, ec.Resource, ec.ErrorDetail, def.Description) AuditLog(db, &AuditEvent{ Action: ec.Code, Actor: ec.Actor, Title: auditTitle, IPAddr: ec.IPAddr, }) // Log to operator logs (human readable) log.Printf("[%s] %s | resource=%s | op=%s | actor=%s | error=%s | severity=%s", ec.Code, def.Description, ec.Resource, ec.Operation, ec.Actor, ec.ErrorDetail, def.Severity.String(), ) // Post to central if ticket-creating event // "Let it rain" - every event is sent individually, no deduplication if def.CreateTicket && globalCentralClient != nil { go globalCentralClient.postEvent(CentralEvent{ EventID: generateEventID(), Code: ec.Code, Category: def.Category, Severity: def.Severity.String(), Resource: ec.Resource, POP: globalCentralClient.popID, Operation: ec.Operation, ErrorDetail: ec.ErrorDetail, Actor: ec.Actor, Timestamp: time.Now().Unix(), UserMessage: def.Message, }) } } // generateEventID creates a simple UUID-like identifier. func generateEventID() string { return fmt.Sprintf("EVT-%d-%d", time.Now().Unix(), time.Now().UnixNano()%1000) } // LogWarning for non-critical anomalies. func LogWarning(db *DB, ctx context.Context, code, resource, operation, detail, actor, ip string) { LogEvent(db, ctx, EventContext{ Code: code, Category: "performance", // default, override via registry Resource: resource, Operation: operation, Actor: actor, ErrorDetail: detail, IPAddr: ip, Severity: SeverityWarning, }) } // LogError for user-impacting issues. func LogError(db *DB, ctx context.Context, code, resource, operation, detail, actor, ip string) { LogEvent(db, ctx, EventContext{ Code: code, Category: "system", // default, override via registry Resource: resource, Operation: operation, Actor: actor, ErrorDetail: detail, IPAddr: ip, Severity: SeverityError, }) } // --------------------------------------------------------------------------- // HTTP Response Helpers // --------------------------------------------------------------------------- // ErrorResponse sends a user-facing error with code. // Use this for API responses to clients. func ErrorResponse(w http.ResponseWriter, status int, code string) { def, ok := EventRegistry[code] if !ok { def = EventDef{ Code: code, Message: "An error occurred. Please try again.", } } w.Header().Set("Content-Type", "application/json") w.WriteHeader(status) response := map[string]string{ "error": code, "message": def.Message, } // For critical/system errors, add incident reference if available if def.Severity >= SeverityError && def.CreateTicket { response["reference"] = code // User can reference this when contacting support response["status"] = "Our team has been alerted" } json.NewEncoder(w).Encode(response) } // HandleError is the full-flow helper for handlers. // Logs event, posts to central if needed, and returns user response. func HandleError( w http.ResponseWriter, r *http.Request, db *DB, code string, resource string, operation string, internalErr error, httpStatus int, ) { ctx := r.Context() // Build actor from context (set by middleware) actor := "unknown" if a, ok := ctx.Value("actor").(string); ok { actor = a } // Log the full event LogEvent(db, ctx, EventContext{ Code: code, Resource: resource, Operation: operation, Actor: actor, ErrorDetail: internalErr.Error(), IPAddr: r.RemoteAddr, }) // Return user-facing response ErrorResponse(w, httpStatus, code) } // LookupEvent returns the event definition for documentation. func LookupEvent(code string) (EventDef, bool) { def, ok := EventRegistry[code] return def, ok } // ListEventsByCategory returns all events in a category. func ListEventsByCategory(category string) []EventDef { var results []EventDef for _, def := range EventRegistry { if def.Category == category { results = append(results, def) } } return results } // --------------------------------------------------------------------------- // Status Page Integration (for central/clavitor.ai) // --------------------------------------------------------------------------- // StatusPageEntry is returned by central's /status endpoint. type StatusPageEntry struct { Component string `json:"component"` // uk1, db-primary, etc. Status string `json:"status"` // operational, degraded, down IncidentID string `json:"incident_id,omitempty"` UpdatedAt int64 `json:"updated_at"` UserMessage string `json:"user_message,omitempty"` } // IsResourceAffected queries central for recent events on a resource. // This queries central rather than local cache (no local deduplication). func IsResourceAffected(resource string) bool { // TODO: Query central /v1/events?resource=uk1&since=5m // For now, returns false - central is source of truth return false } // GetStatusForResource returns current status by querying central. func GetStatusForResource(resource string) (StatusPageEntry, bool) { // TODO: Query central /v1/status?resource=uk1 // For now, assumes operational - central drives status page return StatusPageEntry{ Component: resource, Status: "operational", UpdatedAt: time.Now().Unix(), }, false } // --------------------------------------------------------------------------- // Central Query Helpers (for clavitor.ai implementation) // --------------------------------------------------------------------------- // ActiveEventSummary is what central's dashboard shows. // SQL equivalent: // SELECT code, resource, pop, COUNT(*) as count, MAX(timestamp) as last_seen // FROM events // WHERE status != 'resolved' // GROUP BY code, resource, pop // ORDER BY count DESC // // This gives you: "uk1 has 12 ERR-50001 in the last hour" type ActiveEventSummary struct { Code string `json:"code"` // ERR-50001 Resource string `json:"resource"` // uk1 POP string `json:"pop"` // zrh Count int `json:"count"` // How many events FirstSeen int64 `json:"first_seen"` // First event timestamp LastSeen int64 `json:"last_seen"` // Most recent event timestamp Status string `json:"status"` // investigating, identified, monitoring, resolved } // CentralQuery represents the query parameters for the central endpoint. // The central API should support: // GET /v1/events?status=active&group_by=code,resource,pop // GET /v1/events?code=ERR-50001&resource=uk1&since=1h // POST /v1/events/bulk-resolve { "code": "ERR-50001", "resource": "uk1" } type CentralQuery struct { Status string `json:"status,omitempty"` // active, resolved, all Code string `json:"code,omitempty"` // ERR-50001 Resource string `json:"resource,omitempty"` // uk1 POP string `json:"pop,omitempty"` // zrh Since string `json:"since,omitempty"` // 1h, 24h, 7d GroupBy []string `json:"group_by,omitempty"` // code, resource, pop Severity string `json:"severity,omitempty"` // error, critical CreateTicket bool `json:"create_ticket,omitempty"` // true = ticket-creating only } // BulkResolveRequest marks events as resolved in bulk. // Use case: uk1 fixed, resolve all ERR-50001 for uk1 at once. type BulkResolveRequest struct { Code string `json:"code"` // Required Resource string `json:"resource,omitempty"` // Optional: resolve for specific resource POP string `json:"pop,omitempty"` // Optional: resolve for specific POP Since int64 `json:"since,omitempty"` // Optional: resolve events after this time Message string `json:"message"` // Resolution message: "Disk space freed, service restored" ResolvedBy string `json:"resolved_by"` // Who fixed it: "ops-johan" } // --------------------------------------------------------------------------- // Environment-based initialization helper // --------------------------------------------------------------------------- // InitErrorsFromEnv sets up the central client from environment variables. // Call this from main(): // lib.InitErrorsFromEnv() func InitErrorsFromEnv() { centralURL := os.Getenv("CLAVITOR_CENTRAL_URL") popID := os.Getenv("CLAVITOR_POP_ID") apiKey := os.Getenv("CLAVITOR_API_KEY") if centralURL != "" && popID != "" && apiKey != "" { InitCentralClient(centralURL, popID, apiKey) log.Printf("[INIT] Central event reporting enabled: POP=%s -> %s", popID, centralURL) } else { log.Printf("[INIT] Central event reporting disabled (community edition or missing config)") } }