From 28a3513fa1e43e9ec5f5e4287275cf86ba083933 Mon Sep 17 00:00:00 2001 From: Nyk <0xnykcd@googlemail.com> Date: Thu, 5 Mar 2026 15:06:06 +0700 Subject: [PATCH] feat(docs): add docs knowledge APIs for issue 189 --- playwright.config.ts | 1 + src/app/api/docs/content/route.ts | 39 +++++ src/app/api/docs/search/route.ts | 29 ++++ src/app/api/docs/tree/route.ts | 21 +++ src/app/api/memory/route.ts | 25 +-- src/lib/docs-knowledge.ts | 242 ++++++++++++++++++++++++++++++ tests/docs-knowledge.spec.ts | 61 ++++++++ 7 files changed, 408 insertions(+), 10 deletions(-) create mode 100644 src/app/api/docs/content/route.ts create mode 100644 src/app/api/docs/search/route.ts create mode 100644 src/app/api/docs/tree/route.ts create mode 100644 src/lib/docs-knowledge.ts create mode 100644 tests/docs-knowledge.spec.ts diff --git a/playwright.config.ts b/playwright.config.ts index 8f25959..854a182 100644 --- a/playwright.config.ts +++ b/playwright.config.ts @@ -34,6 +34,7 @@ export default defineConfig({ API_KEY: process.env.API_KEY || 'test-api-key-e2e-12345', AUTH_USER: process.env.AUTH_USER || 'testadmin', AUTH_PASS: process.env.AUTH_PASS || 'testpass1234!', + OPENCLAW_MEMORY_DIR: process.env.OPENCLAW_MEMORY_DIR || '.data/e2e-memory', }, } }) diff --git a/src/app/api/docs/content/route.ts b/src/app/api/docs/content/route.ts new file mode 100644 index 0000000..8fea307 --- /dev/null +++ b/src/app/api/docs/content/route.ts @@ -0,0 +1,39 @@ +import { NextRequest, NextResponse } from 'next/server' +import { requireRole } from '@/lib/auth' +import { readLimiter } from '@/lib/rate-limit' +import { logger } from '@/lib/logger' +import { readDocsContent } from '@/lib/docs-knowledge' + +export async function GET(request: NextRequest) { + const auth = requireRole(request, 'viewer') + if ('error' in auth) return NextResponse.json({ error: auth.error }, { status: auth.status }) + + const rateCheck = readLimiter(request) + if (rateCheck) return rateCheck + + try { + const { searchParams } = new URL(request.url) + const path = (searchParams.get('path') || '').trim() + + if (!path) { + return NextResponse.json({ error: 'Path required' }, { status: 400 }) + } + + try { + const doc = await readDocsContent(path) + return NextResponse.json(doc) + } catch (error) { + const message = (error as Error).message || '' + if (message.includes('Path not allowed')) { + return NextResponse.json({ error: 'Path not allowed' }, { status: 403 }) + } + if (message.includes('not configured')) { + return NextResponse.json({ error: 'Docs directory not configured' }, { status: 500 }) + } + return NextResponse.json({ error: 'File not found' }, { status: 404 }) + } + } catch (error) { + logger.error({ err: error }, 'GET /api/docs/content error') + return NextResponse.json({ error: 'Failed to load doc content' }, { status: 500 }) + } +} diff --git a/src/app/api/docs/search/route.ts b/src/app/api/docs/search/route.ts new file mode 100644 index 0000000..9a63ede --- /dev/null +++ b/src/app/api/docs/search/route.ts @@ -0,0 +1,29 @@ +import { NextRequest, NextResponse } from 'next/server' +import { requireRole } from '@/lib/auth' +import { readLimiter } from '@/lib/rate-limit' +import { logger } from '@/lib/logger' +import { searchDocs } from '@/lib/docs-knowledge' + +export async function GET(request: NextRequest) { + const auth = requireRole(request, 'viewer') + if ('error' in auth) return NextResponse.json({ error: auth.error }, { status: auth.status }) + + const rateCheck = readLimiter(request) + if (rateCheck) return rateCheck + + try { + const { searchParams } = new URL(request.url) + const query = (searchParams.get('q') || searchParams.get('query') || '').trim() + const limit = Math.min(parseInt(searchParams.get('limit') || '50', 10), 200) + + if (!query) { + return NextResponse.json({ error: 'Query required' }, { status: 400 }) + } + + const results = await searchDocs(query, limit) + return NextResponse.json({ query, results, count: results.length }) + } catch (error) { + logger.error({ err: error }, 'GET /api/docs/search error') + return NextResponse.json({ error: 'Failed to search docs' }, { status: 500 }) + } +} diff --git a/src/app/api/docs/tree/route.ts b/src/app/api/docs/tree/route.ts new file mode 100644 index 0000000..cf6f5a5 --- /dev/null +++ b/src/app/api/docs/tree/route.ts @@ -0,0 +1,21 @@ +import { NextRequest, NextResponse } from 'next/server' +import { requireRole } from '@/lib/auth' +import { readLimiter } from '@/lib/rate-limit' +import { logger } from '@/lib/logger' +import { getDocsTree, listDocsRoots } from '@/lib/docs-knowledge' + +export async function GET(request: NextRequest) { + const auth = requireRole(request, 'viewer') + if ('error' in auth) return NextResponse.json({ error: auth.error }, { status: auth.status }) + + const rateCheck = readLimiter(request) + if (rateCheck) return rateCheck + + try { + const tree = await getDocsTree() + return NextResponse.json({ roots: listDocsRoots(), tree }) + } catch (error) { + logger.error({ err: error }, 'GET /api/docs/tree error') + return NextResponse.json({ error: 'Failed to load docs tree' }, { status: 500 }) + } +} diff --git a/src/app/api/memory/route.ts b/src/app/api/memory/route.ts index a1aa609..a7fddce 100644 --- a/src/app/api/memory/route.ts +++ b/src/app/api/memory/route.ts @@ -44,17 +44,22 @@ async function resolveSafeMemoryPath(baseDir: string, relativePath: string): Pro const baseReal = await realpath(baseDir) const fullPath = resolveWithin(baseDir, relativePath) - // For non-existent paths, validate containment using the parent directory realpath. - // This also blocks symlinked parent segments that escape the base. - let parentReal: string - try { - parentReal = await realpath(dirname(fullPath)) - } catch (err) { - const code = (err as NodeJS.ErrnoException).code - if (code === 'ENOENT') { - throw new Error('Parent directory not found') + // For non-existent targets, validate containment using the nearest existing ancestor. + // This allows nested creates (mkdir -p) while still blocking symlink escapes. + let current = dirname(fullPath) + let parentReal = '' + while (!parentReal) { + try { + parentReal = await realpath(current) + } catch (err) { + const code = (err as NodeJS.ErrnoException).code + if (code !== 'ENOENT') throw err + const next = dirname(current) + if (next === current) { + throw new Error('Parent directory not found') + } + current = next } - throw err } if (!isWithinBase(baseReal, parentReal)) { throw new Error('Path escapes base directory (symlink)') diff --git a/src/lib/docs-knowledge.ts b/src/lib/docs-knowledge.ts new file mode 100644 index 0000000..15e1747 --- /dev/null +++ b/src/lib/docs-knowledge.ts @@ -0,0 +1,242 @@ +import { readdir, readFile, stat, lstat, realpath } from 'fs/promises' +import { existsSync } from 'fs' +import { dirname, join, sep } from 'path' +import { resolveWithin } from '@/lib/paths' +import { config } from '@/lib/config' + +const DOC_ROOT_CANDIDATES = ['docs', 'knowledge-base', 'knowledge', 'memory'] + +export interface DocsTreeNode { + path: string + name: string + type: 'file' | 'directory' + size?: number + modified?: number + children?: DocsTreeNode[] +} + +function normalizeRelativePath(value: string): string { + return String(value || '').replace(/\\/g, '/').replace(/^\/+/, '') +} + +function isWithinBase(base: string, candidate: string): boolean { + if (candidate === base) return true + return candidate.startsWith(base + sep) +} + +async function resolveSafePath(baseDir: string, relativePath: string): Promise { + const baseReal = await realpath(baseDir) + const fullPath = resolveWithin(baseDir, relativePath) + + let parentReal: string + try { + parentReal = await realpath(dirname(fullPath)) + } catch (err) { + const code = (err as NodeJS.ErrnoException).code + if (code === 'ENOENT') throw new Error('Parent directory not found') + throw err + } + + if (!isWithinBase(baseReal, parentReal)) { + throw new Error('Path escapes base directory (symlink)') + } + + try { + const st = await lstat(fullPath) + if (st.isSymbolicLink()) throw new Error('Symbolic links are not allowed') + const fileReal = await realpath(fullPath) + if (!isWithinBase(baseReal, fileReal)) { + throw new Error('Path escapes base directory (symlink)') + } + } catch (err) { + const code = (err as NodeJS.ErrnoException).code + if (code !== 'ENOENT') throw err + } + + return fullPath +} + +function allowedRoots(baseDir: string): string[] { + const candidateRoots = DOC_ROOT_CANDIDATES.filter((root) => existsSync(join(baseDir, root))) + if (candidateRoots.length > 0) return candidateRoots + + const fromConfig = (config.memoryAllowedPrefixes || []) + .map((prefix) => normalizeRelativePath(prefix).replace(/\/$/, '')) + .filter((prefix) => prefix.length > 0) + .filter((prefix) => existsSync(join(baseDir, prefix))) + + return fromConfig +} + +export function listDocsRoots(): string[] { + const baseDir = config.memoryDir + if (!baseDir || !existsSync(baseDir)) return [] + return allowedRoots(baseDir) +} + +export function isDocsPathAllowed(relativePath: string): boolean { + const normalized = normalizeRelativePath(relativePath) + if (!normalized) return false + + const baseDir = config.memoryDir + if (!baseDir || !existsSync(baseDir)) return false + + const roots = allowedRoots(baseDir) + if (roots.length === 0) return false + + return roots.some((root) => normalized === root || normalized.startsWith(`${root}/`)) +} + +async function buildTreeFrom(dirPath: string, relativeBase: string): Promise { + const items = await readdir(dirPath, { withFileTypes: true }) + const nodes: DocsTreeNode[] = [] + + for (const item of items) { + if (item.isSymbolicLink()) continue + const fullPath = join(dirPath, item.name) + const relativePath = normalizeRelativePath(join(relativeBase, item.name)) + + try { + const info = await stat(fullPath) + if (item.isDirectory()) { + const children = await buildTreeFrom(fullPath, relativePath) + nodes.push({ + path: relativePath, + name: item.name, + type: 'directory', + modified: info.mtime.getTime(), + children, + }) + } else if (item.isFile()) { + nodes.push({ + path: relativePath, + name: item.name, + type: 'file', + size: info.size, + modified: info.mtime.getTime(), + }) + } + } catch { + // Ignore unreadable files + } + } + + return nodes.sort((a, b) => { + if (a.type !== b.type) return a.type === 'directory' ? -1 : 1 + return a.name.localeCompare(b.name) + }) +} + +export async function getDocsTree(): Promise { + const baseDir = config.memoryDir + if (!baseDir || !existsSync(baseDir)) return [] + + const roots = allowedRoots(baseDir) + const tree: DocsTreeNode[] = [] + + for (const root of roots) { + const rootPath = join(baseDir, root) + try { + const info = await stat(rootPath) + if (!info.isDirectory()) continue + tree.push({ + path: root, + name: root, + type: 'directory', + modified: info.mtime.getTime(), + children: await buildTreeFrom(rootPath, root), + }) + } catch { + // Ignore unreadable roots + } + } + + return tree +} + +export async function readDocsContent(relativePath: string): Promise<{ content: string; size: number; modified: number; path: string }> { + if (!isDocsPathAllowed(relativePath)) { + throw new Error('Path not allowed') + } + + const baseDir = config.memoryDir + if (!baseDir || !existsSync(baseDir)) { + throw new Error('Docs directory not configured') + } + + const safePath = await resolveSafePath(baseDir, relativePath) + const content = await readFile(safePath, 'utf-8') + const info = await stat(safePath) + + return { + content, + size: info.size, + modified: info.mtime.getTime(), + path: normalizeRelativePath(relativePath), + } +} + +function isSearchable(name: string): boolean { + return name.endsWith('.md') || name.endsWith('.txt') +} + +export async function searchDocs(query: string, limit = 100): Promise> { + const baseDir = config.memoryDir + if (!baseDir || !existsSync(baseDir)) return [] + + const roots = allowedRoots(baseDir) + if (roots.length === 0) return [] + + const q = query.trim().toLowerCase() + if (!q) return [] + + const results: Array<{ path: string; name: string; matches: number }> = [] + + const searchFile = async (fullPath: string, relativePath: string) => { + try { + const info = await stat(fullPath) + if (info.size > 1_000_000) return + const content = (await readFile(fullPath, 'utf-8')).toLowerCase() + let count = 0 + let idx = content.indexOf(q) + while (idx !== -1) { + count += 1 + idx = content.indexOf(q, idx + q.length) + } + if (count > 0) { + results.push({ + path: normalizeRelativePath(relativePath), + name: relativePath.split('/').pop() || relativePath, + matches: count, + }) + } + } catch { + // Ignore unreadable files + } + } + + const searchDir = async (fullDir: string, relativeDir: string) => { + const items = await readdir(fullDir, { withFileTypes: true }) + for (const item of items) { + if (item.isSymbolicLink()) continue + const itemFull = join(fullDir, item.name) + const itemRel = normalizeRelativePath(join(relativeDir, item.name)) + if (item.isDirectory()) { + await searchDir(itemFull, itemRel) + } else if (item.isFile() && isSearchable(item.name.toLowerCase())) { + await searchFile(itemFull, itemRel) + } + } + } + + for (const root of roots) { + const rootPath = join(baseDir, root) + try { + await searchDir(rootPath, root) + } catch { + // Ignore unreadable roots + } + } + + return results.sort((a, b) => b.matches - a.matches).slice(0, Math.max(1, Math.min(limit, 200))) +} diff --git a/tests/docs-knowledge.spec.ts b/tests/docs-knowledge.spec.ts new file mode 100644 index 0000000..a5a9b22 --- /dev/null +++ b/tests/docs-knowledge.spec.ts @@ -0,0 +1,61 @@ +import { test, expect } from '@playwright/test' +import { API_KEY_HEADER } from './helpers' + +test.describe('Docs Knowledge API', () => { + test('tree/search/content flows for markdown knowledge docs', async ({ request }) => { + const stamp = Date.now() + const path = `knowledge-base/e2e-kb-${stamp}.md` + const content = `# E2E Knowledge ${stamp}\n\nDeployment runbook token: kb-search-${stamp}` + + const create = await request.post('/api/memory', { + headers: API_KEY_HEADER, + data: { + action: 'create', + path, + content, + }, + }) + expect(create.status()).toBe(200) + + const tree = await request.get('/api/docs/tree', { headers: API_KEY_HEADER }) + expect(tree.status()).toBe(200) + const treeBody = await tree.json() + expect(Array.isArray(treeBody.tree)).toBe(true) + + const search = await request.get(`/api/docs/search?q=${encodeURIComponent(`kb-search-${stamp}`)}`, { + headers: API_KEY_HEADER, + }) + expect(search.status()).toBe(200) + const searchBody = await search.json() + const found = searchBody.results.find((r: any) => r.path === path) + expect(found).toBeTruthy() + + const doc = await request.get(`/api/docs/content?path=${encodeURIComponent(path)}`, { + headers: API_KEY_HEADER, + }) + expect(doc.status()).toBe(200) + const docBody = await doc.json() + expect(docBody.path).toBe(path) + expect(docBody.content).toContain(`kb-search-${stamp}`) + + const cleanup = await request.delete('/api/memory', { + headers: API_KEY_HEADER, + data: { + action: 'delete', + path, + }, + }) + expect(cleanup.status()).toBe(200) + }) + + test('docs APIs require auth', async ({ request }) => { + const tree = await request.get('/api/docs/tree') + expect(tree.status()).toBe(401) + + const search = await request.get('/api/docs/search?q=deployment') + expect(search.status()).toBe(401) + + const content = await request.get('/api/docs/content?path=knowledge-base/example.md') + expect(content.status()).toBe(401) + }) +})