|
|
|
|
@ -0,0 +1,581 @@
|
|
|
|
|
//go:build commercial
|
|
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"database/sql"
|
|
|
|
|
"encoding/json"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io"
|
|
|
|
|
"log"
|
|
|
|
|
"net/http"
|
|
|
|
|
"net/http/httptest"
|
|
|
|
|
"os"
|
|
|
|
|
"strings"
|
|
|
|
|
"sync"
|
|
|
|
|
"testing"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
_ "github.com/mattn/go-sqlite3"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// logCapture captures log output for verification
|
|
|
|
|
type logCapture struct {
|
|
|
|
|
original *os.File
|
|
|
|
|
reader *os.File
|
|
|
|
|
writer *os.File
|
|
|
|
|
buffer chan string
|
|
|
|
|
done chan bool
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func captureLogs() *logCapture {
|
|
|
|
|
// Save original stderr
|
|
|
|
|
original := os.Stderr
|
|
|
|
|
|
|
|
|
|
// Create pipe to capture log output
|
|
|
|
|
r, w, _ := os.Pipe()
|
|
|
|
|
|
|
|
|
|
// Redirect log output
|
|
|
|
|
os.Stderr = w
|
|
|
|
|
log.SetOutput(w)
|
|
|
|
|
|
|
|
|
|
lc := &logCapture{
|
|
|
|
|
original: original,
|
|
|
|
|
reader: r,
|
|
|
|
|
writer: w,
|
|
|
|
|
buffer: make(chan string, 100),
|
|
|
|
|
done: make(chan bool),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Start goroutine to read from pipe
|
|
|
|
|
go func() {
|
|
|
|
|
var buf bytes.Buffer
|
|
|
|
|
io.Copy(&buf, r)
|
|
|
|
|
lc.buffer <- buf.String()
|
|
|
|
|
close(lc.buffer)
|
|
|
|
|
lc.done <- true
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
return lc
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (lc *logCapture) restore() string {
|
|
|
|
|
// Restore original stderr
|
|
|
|
|
os.Stderr = lc.original
|
|
|
|
|
log.SetOutput(lc.original)
|
|
|
|
|
|
|
|
|
|
// Close writer to signal EOF to reader
|
|
|
|
|
lc.writer.Close()
|
|
|
|
|
|
|
|
|
|
// Wait for reader goroutine to finish
|
|
|
|
|
<-lc.done
|
|
|
|
|
|
|
|
|
|
// Return captured output
|
|
|
|
|
return <-lc.buffer
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestUpdateSpan_DatabaseErrors verifies ERR-TELEMETRY-010 through ERR-TELEMETRY-014
|
|
|
|
|
func TestUpdateSpan_DatabaseErrors(t *testing.T) {
|
|
|
|
|
tests := []struct {
|
|
|
|
|
name string
|
|
|
|
|
setupDB func(*sql.DB)
|
|
|
|
|
expectedErrors []string
|
|
|
|
|
description string
|
|
|
|
|
}{
|
|
|
|
|
{
|
|
|
|
|
name: "ERR-TELEMETRY-010: Maintenance check fails",
|
|
|
|
|
setupDB: func(testDB *sql.DB) {
|
|
|
|
|
// Drop maintenance table to cause query failure
|
|
|
|
|
testDB.Exec(`DROP TABLE IF EXISTS maintenance`)
|
|
|
|
|
},
|
|
|
|
|
expectedErrors: []string{"ERR-TELEMETRY-010"},
|
|
|
|
|
description: "Should log ERR-TELEMETRY-010 when maintenance check fails",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: "ERR-TELEMETRY-011: Uptime span query fails",
|
|
|
|
|
setupDB: func(testDB *sql.DB) {
|
|
|
|
|
// Drop uptime_spans table to cause query failure
|
|
|
|
|
testDB.Exec(`DROP TABLE IF EXISTS uptime_spans`)
|
|
|
|
|
},
|
|
|
|
|
expectedErrors: []string{"ERR-TELEMETRY-011"},
|
|
|
|
|
description: "Should log ERR-TELEMETRY-011 when uptime span query fails",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: "ERR-TELEMETRY-012: Span extend fails",
|
|
|
|
|
setupDB: func(testDB *sql.DB) {
|
|
|
|
|
// Insert a span but make the table read-only by corrupting it
|
|
|
|
|
testDB.Exec(`INSERT INTO uptime_spans (node_id, start_at, end_at) VALUES (?, ?, ?)`,
|
|
|
|
|
"test-node-err", time.Now().Unix()-30, time.Now().Unix()-30)
|
|
|
|
|
// Now drop and recreate to cause update to reference non-existent row
|
|
|
|
|
testDB.Exec(`DELETE FROM uptime_spans WHERE node_id = ?`, "test-node-err")
|
|
|
|
|
},
|
|
|
|
|
expectedErrors: []string{}, // May or may not trigger depending on timing
|
|
|
|
|
description: "Database error handling for span extend",
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, tt := range tests {
|
|
|
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
|
|
|
// Setup test database
|
|
|
|
|
setupTestDB(t)
|
|
|
|
|
defer cleanupTestDB()
|
|
|
|
|
|
|
|
|
|
// Apply database corruption/setup
|
|
|
|
|
tt.setupDB(db)
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
lc := captureLogs()
|
|
|
|
|
|
|
|
|
|
// Call updateSpan which should trigger error logging
|
|
|
|
|
updateSpan("test-node-err", "test-host", "1.0.0", 50.0, 4096, 8192, 50000, 100000, 0.5, 3600)
|
|
|
|
|
|
|
|
|
|
// Restore and get logs
|
|
|
|
|
logs := lc.restore()
|
|
|
|
|
|
|
|
|
|
// Verify expected errors appear in logs
|
|
|
|
|
for _, errCode := range tt.expectedErrors {
|
|
|
|
|
if !strings.Contains(logs, errCode) {
|
|
|
|
|
t.Errorf("%s: expected log to contain %s, but got:\n%s", tt.description, errCode, logs)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestUpdateSpan_MaintenanceModeError specifically tests ERR-TELEMETRY-010
|
|
|
|
|
func TestUpdateSpan_MaintenanceModeError(t *testing.T) {
|
|
|
|
|
setupTestDB(t)
|
|
|
|
|
defer cleanupTestDB()
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
var logBuffer bytes.Buffer
|
|
|
|
|
originalOutput := log.Writer()
|
|
|
|
|
log.SetOutput(&logBuffer)
|
|
|
|
|
defer log.SetOutput(originalOutput)
|
|
|
|
|
|
|
|
|
|
// Corrupt the database by dropping the maintenance table
|
|
|
|
|
_, err := db.Exec(`DROP TABLE maintenance`)
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Failed to drop maintenance table: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Call updateSpan
|
|
|
|
|
updateSpan("test-node-maint", "test-host", "1.0.0", 50.0, 4096, 8192, 50000, 100000, 0.5, 3600)
|
|
|
|
|
|
|
|
|
|
// Verify ERR-TELEMETRY-010 appears in logs
|
|
|
|
|
logs := logBuffer.String()
|
|
|
|
|
if !strings.Contains(logs, "ERR-TELEMETRY-010") {
|
|
|
|
|
t.Errorf("Expected ERR-TELEMETRY-010 in logs when maintenance check fails, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
if !strings.Contains(logs, "Failed to check maintenance mode") {
|
|
|
|
|
t.Errorf("Expected 'Failed to check maintenance mode' message, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestUpdateSpan_UptimeSpanQueryError specifically tests ERR-TELEMETRY-011
|
|
|
|
|
func TestUpdateSpan_UptimeSpanQueryError(t *testing.T) {
|
|
|
|
|
setupTestDB(t)
|
|
|
|
|
defer cleanupTestDB()
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
var logBuffer bytes.Buffer
|
|
|
|
|
originalOutput := log.Writer()
|
|
|
|
|
log.SetOutput(&logBuffer)
|
|
|
|
|
defer log.SetOutput(originalOutput)
|
|
|
|
|
|
|
|
|
|
// Corrupt the database by making it read-only via closing and reopening with bad path
|
|
|
|
|
db.Close()
|
|
|
|
|
var err error
|
|
|
|
|
db, err = sql.Open("sqlite3", ":memory:")
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Failed to reopen database: %v", err)
|
|
|
|
|
}
|
|
|
|
|
// Don't create tables - queries will fail
|
|
|
|
|
|
|
|
|
|
// Call updateSpan
|
|
|
|
|
updateSpan("test-node-span", "test-host", "1.0.0", 50.0, 4096, 8192, 50000, 100000, 0.5, 3600)
|
|
|
|
|
|
|
|
|
|
// Verify ERR-TELEMETRY-011 appears in logs
|
|
|
|
|
logs := logBuffer.String()
|
|
|
|
|
if !strings.Contains(logs, "ERR-TELEMETRY-011") {
|
|
|
|
|
t.Errorf("Expected ERR-TELEMETRY-011 in logs when uptime span query fails, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
if !strings.Contains(logs, "Failed to query latest uptime span") {
|
|
|
|
|
t.Errorf("Expected 'Failed to query latest uptime span' message, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestSendKumaPush_DatabaseError tests ERR-TELEMETRY-033
|
|
|
|
|
func TestSendKumaPush_DatabaseError(t *testing.T) {
|
|
|
|
|
// Setup test database then corrupt it
|
|
|
|
|
var err error
|
|
|
|
|
db, err = sql.Open("sqlite3", ":memory:")
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Failed to open test database: %v", err)
|
|
|
|
|
}
|
|
|
|
|
defer db.Close()
|
|
|
|
|
// Don't create tables - this will cause queries to fail
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
var logBuffer bytes.Buffer
|
|
|
|
|
originalOutput := log.Writer()
|
|
|
|
|
log.SetOutput(&logBuffer)
|
|
|
|
|
defer log.SetOutput(originalOutput)
|
|
|
|
|
|
|
|
|
|
// Create a test server that accepts Kuma pushes
|
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
|
|
|
w.WriteHeader(http.StatusOK)
|
|
|
|
|
}))
|
|
|
|
|
defer server.Close()
|
|
|
|
|
|
|
|
|
|
// Call sendKumaPush
|
|
|
|
|
sendKumaPush(server.URL)
|
|
|
|
|
|
|
|
|
|
// Verify ERR-TELEMETRY-033 appears in logs (failed to query last telemetry)
|
|
|
|
|
logs := logBuffer.String()
|
|
|
|
|
if !strings.Contains(logs, "ERR-TELEMETRY-033") {
|
|
|
|
|
t.Errorf("Expected ERR-TELEMETRY-033 in logs when telemetry timestamp query fails, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestSendKumaPush_NetworkError tests ERR-TELEMETRY-030
|
|
|
|
|
func TestSendKumaPush_NetworkError(t *testing.T) {
|
|
|
|
|
setupTestDB(t)
|
|
|
|
|
defer cleanupTestDB()
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
var logBuffer bytes.Buffer
|
|
|
|
|
originalOutput := log.Writer()
|
|
|
|
|
log.SetOutput(&logBuffer)
|
|
|
|
|
defer log.SetOutput(originalOutput)
|
|
|
|
|
|
|
|
|
|
// Use an invalid URL that will cause network error
|
|
|
|
|
invalidURL := "http://localhost:1/invalid" // Port 1 is typically invalid/unused
|
|
|
|
|
|
|
|
|
|
// Call sendKumaPush with invalid URL
|
|
|
|
|
sendKumaPush(invalidURL)
|
|
|
|
|
|
|
|
|
|
// Verify ERR-TELEMETRY-030 appears in logs
|
|
|
|
|
logs := logBuffer.String()
|
|
|
|
|
if !strings.Contains(logs, "ERR-TELEMETRY-030") {
|
|
|
|
|
t.Errorf("Expected ERR-TELEMETRY-030 in logs when Kuma push fails, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
if !strings.Contains(logs, "Failed to push health status to Kuma") {
|
|
|
|
|
t.Errorf("Expected 'Failed to push health status to Kuma' message, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestSendKumaPush_NonOKStatus tests ERR-TELEMETRY-031
|
|
|
|
|
func TestSendKumaPush_NonOKStatus(t *testing.T) {
|
|
|
|
|
setupTestDB(t)
|
|
|
|
|
defer cleanupTestDB()
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
var logBuffer bytes.Buffer
|
|
|
|
|
originalOutput := log.Writer()
|
|
|
|
|
log.SetOutput(&logBuffer)
|
|
|
|
|
defer log.SetOutput(originalOutput)
|
|
|
|
|
|
|
|
|
|
// Create a test server that returns non-OK status
|
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
|
|
|
w.WriteHeader(http.StatusServiceUnavailable) // 503
|
|
|
|
|
w.Write([]byte("Service Unavailable"))
|
|
|
|
|
}))
|
|
|
|
|
defer server.Close()
|
|
|
|
|
|
|
|
|
|
// Call sendKumaPush
|
|
|
|
|
sendKumaPush(server.URL)
|
|
|
|
|
|
|
|
|
|
// Verify ERR-TELEMETRY-031 appears in logs
|
|
|
|
|
logs := logBuffer.String()
|
|
|
|
|
if !strings.Contains(logs, "ERR-TELEMETRY-031") {
|
|
|
|
|
t.Errorf("Expected ERR-TELEMETRY-031 in logs when Kuma returns non-OK status, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
if !strings.Contains(logs, "non-OK status 503") {
|
|
|
|
|
t.Errorf("Expected 'non-OK status 503' message, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestSendKumaPush_ResponseBodyCloseError tests response body handling
|
|
|
|
|
func TestSendKumaPush_ResponseBodyCloseError(t *testing.T) {
|
|
|
|
|
setupTestDB(t)
|
|
|
|
|
defer cleanupTestDB()
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
var logBuffer bytes.Buffer
|
|
|
|
|
originalOutput := log.Writer()
|
|
|
|
|
log.SetOutput(&logBuffer)
|
|
|
|
|
defer log.SetOutput(originalOutput)
|
|
|
|
|
|
|
|
|
|
// Create a test server that returns OK
|
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
|
|
|
w.WriteHeader(http.StatusOK)
|
|
|
|
|
}))
|
|
|
|
|
defer server.Close()
|
|
|
|
|
|
|
|
|
|
// Call sendKumaPush
|
|
|
|
|
sendKumaPush(server.URL)
|
|
|
|
|
|
|
|
|
|
// Should complete without ERR-TELEMETRY-032 (successful close doesn't log error)
|
|
|
|
|
logs := logBuffer.String()
|
|
|
|
|
if strings.Contains(logs, "ERR-TELEMETRY-032") {
|
|
|
|
|
t.Errorf("Did not expect ERR-TELEMETRY-032 for successful response body close, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestTarpit_EarlyDisconnect tests that tarpit handles client disconnect gracefully
|
|
|
|
|
func TestTarpit_EarlyDisconnect(t *testing.T) {
|
|
|
|
|
// Create a custom ResponseWriter that simulates disconnect after first write
|
|
|
|
|
disconnectWriter := &disconnectSimulatingWriter{
|
|
|
|
|
headers: make(http.Header),
|
|
|
|
|
failAfter: 1, // Fail after first write
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req := httptest.NewRequest("GET", "/unknown", nil)
|
|
|
|
|
|
|
|
|
|
// Use a goroutine since tarpit blocks
|
|
|
|
|
done := make(chan bool)
|
|
|
|
|
go func() {
|
|
|
|
|
tarpit(disconnectWriter, req)
|
|
|
|
|
done <- true
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
// Wait for tarpit to complete (should be quick due to disconnect)
|
|
|
|
|
select {
|
|
|
|
|
case <-done:
|
|
|
|
|
// Expected - tarpit returned early due to disconnect
|
|
|
|
|
case <-time.After(2 * time.Second):
|
|
|
|
|
t.Error("tarpit did not return early on client disconnect")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify that at least one write was attempted
|
|
|
|
|
if !disconnectWriter.written {
|
|
|
|
|
t.Error("tarpit should have attempted at least one write")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// disconnectSimulatingWriter simulates a client that disconnects after N writes
|
|
|
|
|
type disconnectSimulatingWriter struct {
|
|
|
|
|
headers http.Header
|
|
|
|
|
status int
|
|
|
|
|
written bool
|
|
|
|
|
writeCount int
|
|
|
|
|
failAfter int
|
|
|
|
|
flushCount int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (m *disconnectSimulatingWriter) Header() http.Header {
|
|
|
|
|
return m.headers
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (m *disconnectSimulatingWriter) Write(p []byte) (int, error) {
|
|
|
|
|
m.written = true
|
|
|
|
|
m.writeCount++
|
|
|
|
|
if m.writeCount >= m.failAfter {
|
|
|
|
|
return 0, io.ErrClosedPipe // Simulate client disconnect
|
|
|
|
|
}
|
|
|
|
|
return len(p), nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (m *disconnectSimulatingWriter) WriteHeader(status int) {
|
|
|
|
|
m.status = status
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (m *disconnectSimulatingWriter) Flush() {
|
|
|
|
|
m.flushCount++
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestErrorCodes_Unique verifies that all error codes are unique
|
|
|
|
|
func TestErrorCodes_Unique(t *testing.T) {
|
|
|
|
|
// Read the main.go and kuma.go files to extract error codes
|
|
|
|
|
mainContent, err := os.ReadFile("main.go")
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Failed to read main.go: %v", err)
|
|
|
|
|
}
|
|
|
|
|
kumaContent, err := os.ReadFile("kuma.go")
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Failed to read kuma.go: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
content := string(mainContent) + string(kumaContent)
|
|
|
|
|
|
|
|
|
|
// Extract all ERR-TELEMETRY-XXX codes
|
|
|
|
|
codes := make(map[string]int)
|
|
|
|
|
for i := 0; i < len(content)-15; i++ {
|
|
|
|
|
if content[i:i+14] == "ERR-TELEMETRY-" {
|
|
|
|
|
// Find the end of the code (3 digits)
|
|
|
|
|
end := i + 14
|
|
|
|
|
for end < len(content) && content[end] >= '0' && content[end] <= '9' {
|
|
|
|
|
end++
|
|
|
|
|
}
|
|
|
|
|
if end > i+14 {
|
|
|
|
|
code := content[i:end]
|
|
|
|
|
codes[code]++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify each code appears only once (unique)
|
|
|
|
|
for code, count := range codes {
|
|
|
|
|
if count > 1 {
|
|
|
|
|
t.Errorf("Error code %s appears %d times - should be unique", code, count)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify expected codes exist
|
|
|
|
|
expectedCodes := []string{
|
|
|
|
|
"ERR-TELEMETRY-001", // Failed to open operations.db
|
|
|
|
|
"ERR-TELEMETRY-002", // Failed to load CA chain
|
|
|
|
|
"ERR-TELEMETRY-003", // Invalid certificate
|
|
|
|
|
"ERR-TELEMETRY-004", // Failed to insert telemetry
|
|
|
|
|
"ERR-TELEMETRY-005", // Failed to create telemetry table
|
|
|
|
|
"ERR-TELEMETRY-006", // Failed to create telemetry index
|
|
|
|
|
"ERR-TELEMETRY-007", // Failed to create telemetry node_latest index
|
|
|
|
|
"ERR-TELEMETRY-008", // Failed to create uptime_spans table
|
|
|
|
|
"ERR-TELEMETRY-009", // Failed to create uptime_spans index
|
|
|
|
|
"ERR-TELEMETRY-010", // Failed to check maintenance mode
|
|
|
|
|
"ERR-TELEMETRY-011", // Failed to query latest uptime span
|
|
|
|
|
"ERR-TELEMETRY-012", // Failed to extend uptime span
|
|
|
|
|
"ERR-TELEMETRY-013", // Failed to extend early-judgment span
|
|
|
|
|
"ERR-TELEMETRY-014", // Failed to insert new uptime span
|
|
|
|
|
"ERR-TELEMETRY-015", // Failed to create maintenance table
|
|
|
|
|
"ERR-TELEMETRY-020", // Failed to create ntfy alert request
|
|
|
|
|
"ERR-TELEMETRY-021", // Failed to send ntfy alert
|
|
|
|
|
"ERR-TELEMETRY-022", // Failed to close ntfy response body
|
|
|
|
|
"ERR-TELEMETRY-030", // Failed to push health to Kuma
|
|
|
|
|
"ERR-TELEMETRY-031", // Kuma returned non-OK status
|
|
|
|
|
"ERR-TELEMETRY-032", // Failed to close Kuma response body (OK status)
|
|
|
|
|
"ERR-TELEMETRY-033", // Failed to query last telemetry timestamp
|
|
|
|
|
"ERR-TELEMETRY-034", // Failed to close Kuma response body (non-OK status)
|
|
|
|
|
"ERR-TELEMETRY-040", // tarpit called without Flusher
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, code := range expectedCodes {
|
|
|
|
|
if _, exists := codes[code]; !exists {
|
|
|
|
|
t.Errorf("Expected error code %s not found in source", code)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
t.Logf("Found %d unique error codes", len(codes))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestErrorCodes_Format verifies error codes follow the correct format
|
|
|
|
|
func TestErrorCodes_Format(t *testing.T) {
|
|
|
|
|
// Read source files
|
|
|
|
|
mainContent, err := os.ReadFile("main.go")
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Failed to read main.go: %v", err)
|
|
|
|
|
}
|
|
|
|
|
kumaContent, err := os.ReadFile("kuma.go")
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Failed to read kuma.go: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
content := string(mainContent) + string(kumaContent)
|
|
|
|
|
|
|
|
|
|
// Check that error messages follow format: ERR-TELEMETRY-XXX: Actionable message
|
|
|
|
|
lines := strings.Split(content, "\n")
|
|
|
|
|
for i, line := range lines {
|
|
|
|
|
if strings.Contains(line, "ERR-TELEMETRY-") && strings.Contains(line, "log.") {
|
|
|
|
|
// Verify format includes colon after error code
|
|
|
|
|
if !strings.Contains(line, "ERR-TELEMETRY-") || !strings.Contains(line, ":") {
|
|
|
|
|
t.Errorf("Line %d: Error message should follow format 'ERR-TELEMETRY-XXX: message', got: %s", i+1, strings.TrimSpace(line))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestHandleTelemetry_DatabaseInsertError tests ERR-TELEMETRY-004
|
|
|
|
|
func TestHandleTelemetry_DatabaseInsertError(t *testing.T) {
|
|
|
|
|
setupTestDB(t)
|
|
|
|
|
defer cleanupTestDB()
|
|
|
|
|
|
|
|
|
|
// Temporarily disable mTLS
|
|
|
|
|
oldCAPool := caPool
|
|
|
|
|
caPool = nil
|
|
|
|
|
defer func() { caPool = oldCAPool }()
|
|
|
|
|
|
|
|
|
|
// Corrupt database by making it read-only
|
|
|
|
|
db.Exec(`PRAGMA query_only = ON`)
|
|
|
|
|
|
|
|
|
|
payload := map[string]interface{}{
|
|
|
|
|
"node_id": "test-node-db-err",
|
|
|
|
|
"version": "1.0.0",
|
|
|
|
|
"hostname": "test-host",
|
|
|
|
|
"uptime_seconds": 3600,
|
|
|
|
|
"cpu_percent": 25.5,
|
|
|
|
|
"memory_total_mb": 8192,
|
|
|
|
|
"memory_used_mb": 4096,
|
|
|
|
|
"disk_total_mb": 100000,
|
|
|
|
|
"disk_used_mb": 50000,
|
|
|
|
|
"load_1m": 0.5,
|
|
|
|
|
"vault_count": 5,
|
|
|
|
|
"vault_size_mb": 10.5,
|
|
|
|
|
"vault_entries": 100,
|
|
|
|
|
"mode": "commercial",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
body, _ := json.Marshal(payload)
|
|
|
|
|
req := httptest.NewRequest("POST", "/telemetry", bytes.NewReader(body))
|
|
|
|
|
w := httptest.NewRecorder()
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
var logBuffer bytes.Buffer
|
|
|
|
|
originalOutput := log.Writer()
|
|
|
|
|
log.SetOutput(&logBuffer)
|
|
|
|
|
defer log.SetOutput(originalOutput)
|
|
|
|
|
|
|
|
|
|
handleTelemetry(w, req)
|
|
|
|
|
|
|
|
|
|
// Restore query_only mode
|
|
|
|
|
db.Exec(`PRAGMA query_only = OFF`)
|
|
|
|
|
|
|
|
|
|
// Should return 500 error
|
|
|
|
|
resp := w.Result()
|
|
|
|
|
if resp.StatusCode != 500 {
|
|
|
|
|
t.Errorf("Expected 500 status for database error, got %d", resp.StatusCode)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify ERR-TELEMETRY-004 appears in logs
|
|
|
|
|
logs := logBuffer.String()
|
|
|
|
|
if !strings.Contains(logs, "ERR-TELEMETRY-004") {
|
|
|
|
|
t.Errorf("Expected ERR-TELEMETRY-004 in logs when telemetry insert fails, got:\n%s", logs)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestConcurrentErrorHandling verifies error handling is thread-safe
|
|
|
|
|
func TestConcurrentErrorHandling(t *testing.T) {
|
|
|
|
|
setupTestDB(t)
|
|
|
|
|
defer cleanupTestDB()
|
|
|
|
|
|
|
|
|
|
// Capture logs
|
|
|
|
|
var logBuffer bytes.Buffer
|
|
|
|
|
originalOutput := log.Writer()
|
|
|
|
|
log.SetOutput(&logBuffer)
|
|
|
|
|
defer log.SetOutput(originalOutput)
|
|
|
|
|
|
|
|
|
|
// Run multiple goroutines that trigger error handling
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
|
wg.Add(1)
|
|
|
|
|
go func(id int) {
|
|
|
|
|
defer wg.Done()
|
|
|
|
|
nodeID := fmt.Sprintf("concurrent-node-%d", id)
|
|
|
|
|
updateSpan(nodeID, "test-host", "1.0.0", 50.0, 4096, 8192, 50000, 100000, 0.5, 3600)
|
|
|
|
|
}(i)
|
|
|
|
|
}
|
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
|
|
// Verify no panics occurred and logs were written safely
|
|
|
|
|
logs := logBuffer.String()
|
|
|
|
|
// Should have OUTAGE SPAN logs for each node
|
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
|
expectedNode := fmt.Sprintf("concurrent-node-%d", i)
|
|
|
|
|
if !strings.Contains(logs, expectedNode) {
|
|
|
|
|
t.Errorf("Expected logs to contain node %s", expectedNode)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|