feat(metrics): add task regression trend endpoint

This commit is contained in:
Nyk 2026-03-05 18:49:05 +07:00
parent f68acc65c0
commit 6e3fad558d
2 changed files with 264 additions and 0 deletions

View File

@ -0,0 +1,189 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireRole } from '@/lib/auth'
import { readLimiter } from '@/lib/rate-limit'
import { getDatabase } from '@/lib/db'
import { logger } from '@/lib/logger'
interface RegressionTaskRow {
id: number
created_at: number
completed_at: number | null
retry_count: number | null
outcome: string | null
error_message: string | null
}
interface WindowStats {
label: 'baseline' | 'post'
start: number
end: number
sample_size: number
latency_seconds: {
p50: number | null
p95: number | null
avg: number | null
}
interventions: {
count: number
rate: number
}
}
function parseTimestamp(value: string | null): number | null {
if (!value) return null
const trimmed = value.trim()
if (!trimmed) return null
const numeric = Number(trimmed)
if (Number.isFinite(numeric) && numeric > 0) {
return Math.floor(numeric)
}
const parsed = Date.parse(trimmed)
if (!Number.isNaN(parsed)) {
return Math.floor(parsed / 1000)
}
return null
}
function percentileNearestRank(values: number[], percentile: number): number | null {
if (values.length === 0) return null
const sorted = [...values].sort((a, b) => a - b)
const rank = Math.ceil((percentile / 100) * sorted.length)
const index = Math.min(sorted.length - 1, Math.max(0, rank - 1))
return sorted[index]
}
function average(values: number[]): number | null {
if (values.length === 0) return null
const sum = values.reduce((acc, value) => acc + value, 0)
return sum / values.length
}
function isTaskIntervened(row: RegressionTaskRow): boolean {
const retryCount = Number(row.retry_count || 0)
const outcome = String(row.outcome || '').toLowerCase()
const hasErrorMessage = String(row.error_message || '').trim().length > 0
return retryCount > 0 || hasErrorMessage || outcome === 'failed' || outcome === 'partial' || outcome === 'abandoned'
}
function buildWindowStats(
label: 'baseline' | 'post',
start: number,
end: number,
tasks: RegressionTaskRow[],
): WindowStats {
const latencySamples: number[] = []
let interventionCount = 0
for (const task of tasks) {
if (!task.completed_at) continue
if (task.completed_at < start || task.completed_at >= end) continue
if (task.completed_at >= task.created_at) {
latencySamples.push(task.completed_at - task.created_at)
}
if (isTaskIntervened(task)) {
interventionCount += 1
}
}
const sampleSize = latencySamples.length
return {
label,
start,
end,
sample_size: sampleSize,
latency_seconds: {
p50: percentileNearestRank(latencySamples, 50),
p95: percentileNearestRank(latencySamples, 95),
avg: average(latencySamples),
},
interventions: {
count: interventionCount,
rate: sampleSize > 0 ? interventionCount / sampleSize : 0,
},
}
}
export async function GET(request: NextRequest) {
const auth = requireRole(request, 'viewer')
if ('error' in auth) return NextResponse.json({ error: auth.error }, { status: auth.status })
const rateCheck = readLimiter(request)
if (rateCheck) return rateCheck
try {
const workspaceId = auth.user.workspace_id ?? 1
const now = Math.floor(Date.now() / 1000)
const { searchParams } = new URL(request.url)
const betaStart = parseTimestamp(searchParams.get('beta_start') || searchParams.get('cutover'))
if (!betaStart) {
return NextResponse.json({ error: 'beta_start query parameter is required (unix seconds or ISO timestamp)' }, { status: 400 })
}
if (betaStart > now) {
return NextResponse.json({ error: 'beta_start must not be in the future' }, { status: 400 })
}
const maxLookbackSeconds = 30 * 24 * 60 * 60
const lookbackSecondsRaw = Number(searchParams.get('lookback_seconds') || 7 * 24 * 60 * 60)
const lookbackSeconds = Math.min(maxLookbackSeconds, Math.max(60, Math.floor(Number.isFinite(lookbackSecondsRaw) ? lookbackSecondsRaw : 7 * 24 * 60 * 60)))
const postStart = betaStart
// Include tasks completed in the current second.
const postEnd = now + 1
const postDuration = Math.max(60, postEnd - postStart)
const baselineDuration = Math.min(lookbackSeconds, postDuration)
const baselineEnd = betaStart
const baselineStart = Math.max(0, baselineEnd - baselineDuration)
const db = getDatabase()
const rows = db.prepare(`
SELECT
id,
created_at,
completed_at,
retry_count,
outcome,
error_message
FROM tasks
WHERE workspace_id = ?
AND status = 'done'
AND completed_at IS NOT NULL
AND completed_at >= ?
AND completed_at < ?
`).all(workspaceId, baselineStart, postEnd) as RegressionTaskRow[]
const baseline = buildWindowStats('baseline', baselineStart, baselineEnd, rows)
const post = buildWindowStats('post', postStart, postEnd, rows)
const p95Delta = (post.latency_seconds.p95 !== null && baseline.latency_seconds.p95 !== null)
? post.latency_seconds.p95 - baseline.latency_seconds.p95
: null
const interventionRateDelta = post.interventions.rate - baseline.interventions.rate
return NextResponse.json({
metric_definitions: {
p95_task_latency_seconds: '95th percentile of (completed_at - created_at) for done tasks in the window',
intervention_rate: 'intervened_task_count / sample_size where intervened = retry_count>0 OR outcome in {failed,partial,abandoned} OR error_message not empty',
},
params: {
beta_start: betaStart,
lookback_seconds: lookbackSeconds,
},
windows: {
baseline,
post,
},
deltas: {
p95_latency_seconds: p95Delta,
intervention_rate: interventionRateDelta,
},
})
} catch (error) {
logger.error({ err: error }, 'GET /api/tasks/regression error')
return NextResponse.json({ error: 'Failed to compute regression metrics' }, { status: 500 })
}
}

View File

@ -0,0 +1,75 @@
import { expect, test } from '@playwright/test'
import { API_KEY_HEADER, createTestTask, deleteTestTask } from './helpers'
test.describe('Task Regression Metrics API', () => {
const cleanup: number[] = []
test.afterEach(async ({ request }) => {
for (const id of cleanup) {
await deleteTestTask(request, id).catch(() => {})
}
cleanup.length = 0
})
test('returns baseline vs post p95 latency and intervention trend', async ({ request }) => {
const baselineTaskA = await createTestTask(request, {
status: 'done',
retry_count: 0,
outcome: 'success',
})
expect(baselineTaskA.res.status()).toBe(201)
cleanup.push(baselineTaskA.id)
const baselineTaskB = await createTestTask(request, {
status: 'done',
retry_count: 1,
outcome: 'partial',
error_message: 'Needs operator check',
})
expect(baselineTaskB.res.status()).toBe(201)
cleanup.push(baselineTaskB.id)
await new Promise((resolve) => setTimeout(resolve, 1200))
const betaStart = Math.floor(Date.now() / 1000)
const postTaskA = await createTestTask(request, {
status: 'done',
retry_count: 2,
outcome: 'failed',
error_message: 'Escalated',
})
expect(postTaskA.res.status()).toBe(201)
cleanup.push(postTaskA.id)
const postTaskB = await createTestTask(request, {
status: 'done',
retry_count: 1,
outcome: 'abandoned',
error_message: 'Manual rollback',
})
expect(postTaskB.res.status()).toBe(201)
cleanup.push(postTaskB.id)
const res = await request.get(`/api/tasks/regression?beta_start=${betaStart}&lookback_seconds=3600`, {
headers: API_KEY_HEADER,
})
const responseText = await res.text()
expect(res.status(), responseText).toBe(200)
const body = JSON.parse(responseText)
expect(body.metric_definitions).toBeTruthy()
expect(body.windows?.baseline?.sample_size).toBeGreaterThan(0)
expect(body.windows?.post?.sample_size).toBeGreaterThan(0)
expect(typeof body.windows?.baseline?.latency_seconds?.p95).toBe('number')
expect(typeof body.windows?.post?.latency_seconds?.p95).toBe('number')
expect(body.windows?.post?.interventions?.rate).toBeGreaterThan(body.windows?.baseline?.interventions?.rate)
})
test('requires beta_start and auth', async ({ request }) => {
const unauth = await request.get('/api/tasks/regression?beta_start=1700000000')
expect(unauth.status()).toBe(401)
const missing = await request.get('/api/tasks/regression', { headers: API_KEY_HEADER })
expect(missing.status()).toBe(400)
})
})