Search: sort by timestamp desc, display dates in results

- FTS search results now ORDER BY processed_at DESC (was rank) - Full search page shows processed timestamp with formatDateTime - Quick search dropdown shows formatted date next to category - Fixed formatDate to handle timezone-aware timestamps
fix: skip store copy if file already exists (reprocess safety)
2026-03-31 13:34:54 -04:00 · 2026-03-23 14:27:38 -04:00 · 2026-03-23 14:14:28 -04:00 · 2026-03-23 14:07:20 -04:00 · 2026-03-23 13:58:47 -04:00 · 2026-02-28 06:01:28 -05:00
9 changed files with 1504 additions and 47 deletions
--- a/ai.go
+++ b/ai.go
@ -1,6 +1,7 @@
 package main

 import (
+	"archive/zip"
 	"bytes"
 	"crypto/sha256"
 	"encoding/base64"
@ -89,7 +90,7 @@ func ConvertToImage(filePath string) ([]byte, error) {
 		}
 		defer os.RemoveAll(tmpDir)

-		cmd := exec.Command("libreoffice", "--headless", "--convert-to", "pdf", "--outdir", tmpDir, filePath)
+		cmd := exec.Command("soffice", "--headless", "--convert-to", "pdf", "--outdir", tmpDir, filePath)
 		if err := cmd.Run(); err != nil {
 			return nil, fmt.Errorf("libreoffice conversion failed: %w", err)
 		}
@ -115,14 +116,21 @@ func ConvertToImage(filePath string) ([]byte, error) {
 			return nil, fmt.Errorf("pdftoppm failed: %w", err)
 		}

-		pngPath := filepath.Join(tmpDir, "page-1.png")
-		return os.ReadFile(pngPath)
+		// pdftoppm uses variable-width zero-padding depending on page count
+		// (e.g. page-01.png for <100 pages, page-001.png for <1000 pages).
+		// Glob for the first match instead of hardcoding "page-1.png".
+		matches, err := filepath.Glob(filepath.Join(tmpDir, "page-*.png"))
+		if err != nil || len(matches) == 0 {
+			return nil, fmt.Errorf("pdftoppm output not found in %s", tmpDir)
+		}
+		return os.ReadFile(matches[0])
 	}

 	// Image files — read directly
 	return os.ReadFile(filePath)
 }

+
 // IsTextFile returns true for plain text files
 func IsTextFile(ext string) bool {
 	textExts := map[string]bool{
@ -159,12 +167,18 @@ func AnalyzeWithVision(imageData []byte) (*DocumentAnalysis, error) {
   - vendor: Company/organization name
   - amount: Dollar/currency amount if present (e.g., "$123.45" or "809,400 BYN")

-5. **Title**: SHORT English title (max 6-8 words), e.g. "Apple Store Mac Mini Receipt" or "Electric Bill March 2025"
+5. **Title**: Specific English title (max 8 words) that identifies THIS document uniquely. Ask yourself: "If I had to find this document again, what would I search for?" Prioritize the most unique/recoverable detail: an account number, confirmation code, ID, or specific value that can't be inferred from context. If the document's main value is a number or ID, put it in the title. Bad: "FedEx Receipt Jan 2026" (generic). Good: "FedEx Account 203634010" or "Duke Energy Electric Bill Oct 2024" or "IRS Form 1099-INT Chase Bank 2024". Never use generic words like "Document", "Letter", "Report" alone — always qualify them.

 6. **Summary**: 1-2 sentence English description with key details.

 IMPORTANT: The full_text field MUST contain the verbatim transcription in the document's original language. This is non-negotiable.

+**Known proper nouns** (use these exact spellings when you see similar handwriting):
+- Jongsma (surname — may look like "Jongoma", "Jongsoma", "Jongma")
+- Johan (first name)
+- Tatyana / Tanya (first name)
+- St. Petersburg, Florida (city — not Russia)
+
 Respond in JSON ONLY:
 {"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "title": "...", "summary": "...", "full_text": "..."}`

@ -227,12 +241,20 @@ func AnalyzeText(text, filename string) (*DocumentAnalysis, error) {

 Categorize into ONE of: taxes, bills, medical, insurance, legal, financial, expenses, vehicles, home, personal, contacts, uncategorized

+For the title: specific and concise (max 8 words). Ask yourself: "What would I search for to find this again?" Prioritize unique/recoverable details — account numbers, IDs, confirmation codes — over generic document type names. Examples:
+- "FedEx Account 203634010" (if the key value is an account number)
+- "Allegiant Confirmation IB2EJA AVL-PIE Feb 2026"
+- "IRS Form 1099-INT Chase Bank 2024"
+Bad: "FedEx Receipt Jan 2026" or "Flight Confirmation" — too generic.
+
+Also produce a "full_text" field: reformat the raw content as clean Markdown. Use ## headers for sections, | tables | for tabular data, **bold** for field labels, and bullet lists where appropriate. Preserve all values exactly. Do not summarize — include everything.
+
 Respond in JSON ONLY:
-{"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "summary": "..."}`, filename, text)
+{"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "title": "...", "summary": "...", "full_text": "..."}`, filename, text)

 	reqBody := map[string]interface{}{
 		"model":      "accounts/fireworks/models/kimi-k2-instruct-0905",
-		"max_tokens": 1024,
+		"max_tokens": 2048,
 		"messages": []map[string]interface{}{
 			{"role": "user", "content": prompt},
 		},
@ -242,7 +264,10 @@ Respond in JSON ONLY:
 	if err != nil {
 		return nil, err
 	}
-	analysis.FullText = text
+	// If model didn't produce full_text, fall back to raw extracted text
+	if analysis.FullText == "" {
+		analysis.FullText = text
+	}
 	return analysis, nil
 }

@ -464,8 +489,13 @@ func ProcessPDFPageByPage(filePath string, jobID string) (string, error) {
 			continue
 		}

-		pngPath := filepath.Join(tmpDir, fmt.Sprintf("page-%d.png", page))
-		imageData, err := os.ReadFile(pngPath)
+		// Glob for the output — pdftoppm zero-pads based on total page count
+		pageMatches, _ := filepath.Glob(filepath.Join(tmpDir, "page-*.png"))
+		if len(pageMatches) == 0 {
+			os.RemoveAll(tmpDir)
+			continue
+		}
+		imageData, err := os.ReadFile(pageMatches[0])
 		os.RemoveAll(tmpDir)
 		if err != nil {
 			continue
@ -611,6 +641,22 @@ func ProcessDocument(filePath string) (*Document, error) {
 			UpdateJob(hash, "error", err.Error())
 			return nil, fmt.Errorf("text analysis failed: %w", err)
 		}
+	} else if IsOfficeFile(ext) {
+		// Office formats — extract text natively from ZIP/XML, no LibreOffice needed
+		UpdateJob(hash, "converting", "Extracting text...")
+		log.Printf("  Extracting text from %s...", ext)
+		text, err := ExtractOfficeText(filePath)
+		if err != nil {
+			UpdateJob(hash, "error", err.Error())
+			return nil, fmt.Errorf("office text extraction failed: %w", err)
+		}
+		log.Printf("  Extracted %d chars, classifying...", len(text))
+		UpdateJob(hash, "classifying", "Classifying...")
+		analysis, err = AnalyzeText(text, filepath.Base(filePath))
+		if err != nil {
+			UpdateJob(hash, "error", err.Error())
+			return nil, fmt.Errorf("text analysis failed: %w", err)
+		}
 	} else {
 		// Vision — convert to image and analyze
 		UpdateJob(hash, "converting", "Converting to image...")
@ -676,10 +722,18 @@ func ProcessDocument(filePath string) (*Document, error) {

 	log.Printf("  Category: %s, Type: %s", analysis.Category, analysis.DocType)

-	// Copy to store
+	// Copy to store (skip if already there — reprocessing from store-backed upload)
 	storePath := filepath.Join(storeDir, hash+ext)
-	if err := copyFile(filePath, storePath); err != nil {
-		return nil, fmt.Errorf("store copy failed: %w", err)
+	if _, statErr := os.Stat(storePath); os.IsNotExist(statErr) {
+		if err := copyFile(filePath, storePath); err != nil {
+			return nil, fmt.Errorf("store copy failed: %w", err)
+		}
+	}
+
+	// Only set PDFPath for actual PDFs — office/text files have no previewable PDF
+	pdfStorePath := ""
+	if ext == ".pdf" {
+		pdfStorePath = storePath
 	}

 	// Create document record
@ -688,7 +742,7 @@ func ProcessDocument(filePath string) (*Document, error) {
 	if title == "" {
 		title = analysis.Summary
 	}
-	
+
 	doc := &Document{
 		ID:           hash,
 		Title:        title,
@ -699,7 +753,7 @@ func ProcessDocument(filePath string) (*Document, error) {
 		Vendor:       analysis.Vendor,
 		Summary:      analysis.Summary,
 		FullText:     analysis.FullText,
-		PDFPath:      storePath,
+		PDFPath:      pdfStorePath,
 		OriginalFile: filepath.Base(filePath),
 		ProcessedAt:  time.Now().Format(time.RFC3339),
 		Status:       "ready",
@ -745,3 +799,211 @@ func copyFile(src, dst string) error {
 	_, err = io.Copy(out, in)
 	return err
 }
+
+// ExtractOfficeText extracts plain text from DOCX/XLSX/PPTX natively.
+// These are ZIP archives containing XML — no LibreOffice needed.
+func ExtractOfficeText(filePath string) (string, error) {
+	ext := strings.ToLower(filepath.Ext(filePath))
+
+	r, err := zip.OpenReader(filePath)
+	if err != nil {
+		return "", fmt.Errorf("not a valid Office file: %w", err)
+	}
+	defer r.Close()
+
+	// Which XML paths to extract per format
+	var targets []string
+	switch ext {
+	case ".docx", ".doc", ".odt", ".rtf":
+		targets = []string{"word/document.xml", "word/body.xml"}
+	case ".xlsx", ".xls":
+		// All sheets
+		for _, f := range r.File {
+			if strings.HasPrefix(f.Name, "xl/worksheets/sheet") && strings.HasSuffix(f.Name, ".xml") {
+				targets = append(targets, f.Name)
+			}
+		}
+		// Shared strings (cell values are stored here for xlsx)
+		targets = append(targets, "xl/sharedStrings.xml")
+	case ".pptx", ".ppt":
+		for _, f := range r.File {
+			if strings.HasPrefix(f.Name, "ppt/slides/slide") && strings.HasSuffix(f.Name, ".xml") {
+				targets = append(targets, f.Name)
+			}
+		}
+	default:
+		return "", fmt.Errorf("unsupported office format: %s", ext)
+	}
+
+	var sb strings.Builder
+	for _, f := range r.File {
+		found := false
+		for _, t := range targets {
+			if f.Name == t {
+				found = true
+				break
+			}
+		}
+		if !found {
+			continue
+		}
+
+		rc, err := f.Open()
+		if err != nil {
+			continue
+		}
+		data, err := io.ReadAll(rc)
+		rc.Close()
+		if err != nil {
+			continue
+		}
+
+		sb.WriteString(xmlToText(data))
+		sb.WriteString("\n")
+	}
+
+	text := strings.TrimSpace(sb.String())
+	if text == "" {
+		return "", fmt.Errorf("no text extracted from %s", filepath.Base(filePath))
+	}
+	return text, nil
+}
+
+// xmlToText parses DOCX/Office XML into structured plain text.
+// Understands paragraph breaks, table rows/cells, and text runs.
+func xmlToText(data []byte) string {
+	s := string(data)
+
+	// Normalise tag names: strip namespace prefix (w:p → p, a:p → p, etc.)
+	// We work on the raw string using simple tag scanning.
+	var sb strings.Builder
+	inTag := false
+	var tagBuf strings.Builder
+	lastWasNewline := false
+
+	flushLine := func() {
+		line := strings.TrimSpace(sb.String())
+		if line != "" {
+			sb.Reset()
+			// return the line — caller collects via out builder
+			// (we reuse sb for the whole doc; collect separately)
+		}
+	}
+	_ = flushLine
+
+	// Two-pass: build a token stream of (tag, text) pairs
+	type token struct {
+		tag  string // lowercased local name, or "" for text
+		text string
+	}
+	var tokens []token
+
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c == '<' {
+			// flush text buffer
+			if sb.Len() > 0 {
+				tokens = append(tokens, token{text: sb.String()})
+				sb.Reset()
+			}
+			inTag = true
+			tagBuf.Reset()
+			continue
+		}
+		if c == '>' {
+			inTag = false
+			raw := strings.TrimSpace(tagBuf.String())
+			// Extract local name (strip namespace and attributes)
+			name := raw
+			if idx := strings.IndexAny(raw, " \t\n\r/"); idx >= 0 {
+				name = raw[:idx]
+			}
+			if idx := strings.Index(name, ":"); idx >= 0 {
+				name = name[idx+1:]
+			}
+			name = strings.ToLower(name)
+			closing := strings.HasPrefix(raw, "/")
+			if closing {
+				name = strings.TrimPrefix(name, "/")
+			}
+			tokens = append(tokens, token{tag: name})
+			tagBuf.Reset()
+			continue
+		}
+		if inTag {
+			tagBuf.WriteByte(c)
+		} else {
+			sb.WriteByte(c)
+		}
+	}
+	if sb.Len() > 0 {
+		tokens = append(tokens, token{text: sb.String()})
+	}
+
+	// Now render tokens into structured text
+	var out strings.Builder
+	pendingNewlines := 0
+
+	emitNewlines := func(n int) {
+		if n > pendingNewlines {
+			pendingNewlines = n
+		}
+	}
+	flushNewlines := func() {
+		for i := 0; i < pendingNewlines; i++ {
+			out.WriteByte('\n')
+		}
+		pendingNewlines = 0
+		lastWasNewline = pendingNewlines == 0
+	}
+	_ = lastWasNewline
+
+	inCell := false
+	cellCount := 0
+
+	for _, tok := range tokens {
+		if tok.tag == "" {
+			// text node
+			txt := tok.text
+			// decode entities
+			txt = strings.ReplaceAll(txt, "&amp;", "&")
+			txt = strings.ReplaceAll(txt, "&lt;", "<")
+			txt = strings.ReplaceAll(txt, "&gt;", ">")
+			txt = strings.ReplaceAll(txt, "&quot;", "\"")
+			txt = strings.ReplaceAll(txt, "&apos;", "'")
+			txt = strings.ReplaceAll(txt, "&#xA;", "\n")
+			txt = strings.ReplaceAll(txt, "&#x9;", "\t")
+			if strings.TrimSpace(txt) != "" {
+				flushNewlines()
+				out.WriteString(txt)
+			}
+			continue
+		}
+		switch tok.tag {
+		case "p": // paragraph
+			emitNewlines(1)
+		case "tr": // table row start
+			inCell = false
+			cellCount = 0
+			emitNewlines(1)
+		case "tc": // table cell
+			if inCell {
+				out.WriteString("\t")
+			}
+			inCell = true
+			cellCount++
+		case "br": // line break
+			emitNewlines(1)
+		}
+	}
+
+	return strings.TrimSpace(out.String())
+}
+
+// IsOfficeFile returns true for formats with extractable XML text.
+func IsOfficeFile(ext string) bool {
+	return map[string]bool{
+		".docx": true, ".xlsx": true, ".pptx": true,
+		".odt": true, ".ods": true, ".odp": true,
+	}[ext]
+}
--- a/ai.go.bak-20260228-005328
+++ b/ai.go.bak-20260228-005328
@ -0,0 +1,891 @@
+package main
+
+import (
+	"archive/zip"
+	"bytes"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+var (
+	fireworksAPIKey string
+	fireworksBaseURL = "https://api.fireworks.ai/inference/v1"
+)
+
+func init() {
+	fireworksAPIKey = os.Getenv("FIREWORKS_API_KEY")
+	if fireworksAPIKey == "" {
+		// Try .env file in docsys directory
+		envPath := filepath.Join(os.Getenv("HOME"), "dev/docsys/.env")
+		if data, err := os.ReadFile(envPath); err == nil {
+			for _, line := range strings.Split(string(data), "\n") {
+				if strings.HasPrefix(line, "FIREWORKS_API_KEY=") {
+					fireworksAPIKey = strings.TrimSpace(strings.TrimPrefix(line, "FIREWORKS_API_KEY="))
+					fireworksAPIKey = strings.Trim(fireworksAPIKey, `"'`)
+					break
+				}
+			}
+		}
+	}
+}
+
+// DocumentAnalysis contains the AI-extracted information
+type DocumentAnalysis struct {
+	Category string      `json:"category"`
+	DocType  string      `json:"doc_type"`
+	Date     string      `json:"date"`
+	Vendor   string      `json:"vendor"`
+	Amount   interface{} `json:"amount"` // Can be string or number
+	Title    string      `json:"title"`
+	Summary  string      `json:"summary"`
+	FullText string      `json:"full_text"`
+}
+
+func (d *DocumentAnalysis) AmountString() string {
+	switch v := d.Amount.(type) {
+	case string:
+		return v
+	case float64:
+		return fmt.Sprintf("$%.2f", v)
+	default:
+		return ""
+	}
+}
+
+// FileHash returns first 16 chars of SHA256 hash
+func FileHash(filepath string) (string, error) {
+	f, err := os.Open(filepath)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	h := sha256.New()
+	if _, err := io.Copy(h, f); err != nil {
+		return "", err
+	}
+	return fmt.Sprintf("%x", h.Sum(nil))[:16], nil
+}
+
+// ConvertToImage converts PDF/Office docs to PNG for vision API
+func ConvertToImage(filePath string) ([]byte, error) {
+	ext := strings.ToLower(filepath.Ext(filePath))
+	
+	// Office documents → PDF first
+	officeExts := map[string]bool{".doc": true, ".docx": true, ".odt": true, ".rtf": true, ".xls": true, ".xlsx": true, ".ppt": true, ".pptx": true}
+	if officeExts[ext] {
+		tmpDir, err := os.MkdirTemp("", "docsys")
+		if err != nil {
+			return nil, err
+		}
+		defer os.RemoveAll(tmpDir)
+
+		cmd := exec.Command("libreoffice", "--headless", "--convert-to", "pdf", "--outdir", tmpDir, filePath)
+		if err := cmd.Run(); err != nil {
+			return nil, fmt.Errorf("libreoffice conversion failed: %w", err)
+		}
+
+		base := strings.TrimSuffix(filepath.Base(filePath), ext)
+		pdfPath := filepath.Join(tmpDir, base+".pdf")
+		filePath = pdfPath
+		ext = ".pdf"
+	}
+
+	// PDF → PNG (first page only for preview, full processing done separately)
+	if ext == ".pdf" {
+		tmpDir, err := os.MkdirTemp("", "docsys")
+		if err != nil {
+			return nil, err
+		}
+		defer os.RemoveAll(tmpDir)
+
+		// Convert first page for initial analysis
+		outPrefix := filepath.Join(tmpDir, "page")
+		cmd := exec.Command("pdftoppm", "-png", "-f", "1", "-l", "1", "-r", "150", filePath, outPrefix)
+		if err := cmd.Run(); err != nil {
+			return nil, fmt.Errorf("pdftoppm failed: %w", err)
+		}
+
+		pngPath := filepath.Join(tmpDir, "page-1.png")
+		return os.ReadFile(pngPath)
+	}
+
+	// Image files — read directly
+	return os.ReadFile(filePath)
+}
+
+// IsTextFile returns true for plain text files
+func IsTextFile(ext string) bool {
+	textExts := map[string]bool{
+		".txt": true, ".md": true, ".markdown": true, ".text": true, ".log": true,
+		".json": true, ".xml": true, ".csv": true, ".yaml": true, ".yml": true,
+	}
+	return textExts[ext]
+}
+
+// AnalyzeWithVision uses K2.5 vision model
+func AnalyzeWithVision(imageData []byte) (*DocumentAnalysis, error) {
+	if fireworksAPIKey == "" {
+		return nil, fmt.Errorf("FIREWORKS_API_KEY not set")
+	}
+
+	b64 := base64.StdEncoding.EncodeToString(imageData)
+
+	prompt := `Analyze this document image and extract:
+
+1. **Full Text**: Transcribe ALL visible text EXACTLY as it appears — in the ORIGINAL language of the document. DO NOT translate. DO NOT summarize. Transcribe word-for-word in the source language (Russian, Dutch, German, French, etc.) formatted as clean Markdown:
+   - Use headers (##) for sections
+   - Use **bold** for labels/field names
+   - Use tables for tabular data (items, prices, etc.)
+   - Use bullet lists where appropriate
+   - Preserve ALL numbers, dates, amounts, and codes exactly as shown
+
+2. **Classification**: Categorize into exactly ONE of:
+   taxes, bills, medical, insurance, legal, financial, expenses, vehicles, home, personal, contacts, uncategorized
+
+3. **Document Type**: Specific type (e.g., "utility_bill", "receipt", "tax_form_w2")
+
+4. **Key Fields** (these may be in English for searchability):
+   - date: Document date (YYYY-MM-DD if possible)
+   - vendor: Company/organization name
+   - amount: Dollar/currency amount if present (e.g., "$123.45" or "809,400 BYN")
+
+5. **Title**: Specific English title (max 8 words) that identifies THIS document uniquely. Include the key distinguishing details: who sent it, what it's about, and when. Bad: "Financial Report" or "Invoice". Good: "N-able Technology Exchange Rate Loss Explanation Feb 2025" or "Duke Energy Electric Bill Oct 2024" or "IRS Form 1099-INT Chase Bank 2024" or "BayCare HomeCare Invoice $340 Nov 2025". Never use generic words like "Document", "Letter", "Report" alone — always qualify them.
+
+6. **Summary**: 1-2 sentence English description with key details.
+
+IMPORTANT: The full_text field MUST contain the verbatim transcription in the document's original language. This is non-negotiable.
+
+**Known proper nouns** (use these exact spellings when you see similar handwriting):
+- Jongsma (surname — may look like "Jongoma", "Jongsoma", "Jongma")
+- Johan (first name)
+- Tatyana / Tanya (first name)
+- St. Petersburg, Florida (city — not Russia)
+
+Respond in JSON ONLY:
+{"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "title": "...", "summary": "...", "full_text": "..."}`
+
+	reqBody := map[string]interface{}{
+		"model":      "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct",
+		"max_tokens": 4096,
+		"messages": []map[string]interface{}{
+			{"role": "system", "content": "You are a document analysis API. Output ONLY raw JSON. No thinking, no commentary, no code fences. First character must be {, last character must be }."},
+			{
+				"role": "user",
+				"content": []map[string]interface{}{
+					{"type": "image_url", "image_url": map[string]string{"url": "data:image/png;base64," + b64}},
+					{"type": "text", "text": prompt},
+				},
+			},
+		},
+	}
+
+	analysis, err := callFireworks(reqBody)
+	if err != nil {
+		// Retry once with minimal prompt to avoid triggering extended reasoning
+		log.Printf("  [AI] First attempt failed, retrying with simplified prompt...")
+		retryBody := map[string]interface{}{
+			"model":      "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct",
+			"max_tokens": 4096,
+			"messages": []map[string]interface{}{
+				{"role": "system", "content": "Output valid JSON only. No other text."},
+				{
+					"role": "user",
+					"content": []map[string]interface{}{
+						{"type": "image_url", "image_url": map[string]string{"url": "data:image/png;base64," + b64}},
+						{"type": "text", "text": `Look at this document image. Transcribe ALL text verbatim (original language, do NOT translate). Return ONLY this JSON with no placeholders:
+{"category":"","doc_type":"","date":"","vendor":"","amount":"","title":"","summary":"","full_text":""}`},
+					},
+				},
+			},
+		}
+		return callFireworks(retryBody)
+	}
+	return analysis, nil
+}
+
+// AnalyzeText uses K2 text model for plain text files
+func AnalyzeText(text, filename string) (*DocumentAnalysis, error) {
+	if fireworksAPIKey == "" {
+		return nil, fmt.Errorf("FIREWORKS_API_KEY not set")
+	}
+
+	// Truncate long text
+	if len(text) > 50000 {
+		text = text[:50000]
+	}
+
+	prompt := fmt.Sprintf(`Analyze this document:
+
+**Filename:** %s
+
+**Content:**
+%s
+
+Categorize into ONE of: taxes, bills, medical, insurance, legal, financial, expenses, vehicles, home, personal, contacts, uncategorized
+
+Respond in JSON ONLY:
+{"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "summary": "..."}`, filename, text)
+
+	reqBody := map[string]interface{}{
+		"model":      "accounts/fireworks/models/kimi-k2-instruct-0905",
+		"max_tokens": 1024,
+		"messages": []map[string]interface{}{
+			{"role": "user", "content": prompt},
+		},
+	}
+
+	analysis, err := callFireworks(reqBody)
+	if err != nil {
+		return nil, err
+	}
+	analysis.FullText = text
+	return analysis, nil
+}
+
+func callFireworks(reqBody map[string]interface{}) (*DocumentAnalysis, error) {
+	jsonBody, _ := json.Marshal(reqBody)
+
+	req, _ := http.NewRequest("POST", fireworksBaseURL+"/chat/completions", bytes.NewReader(jsonBody))
+	req.Header.Set("Authorization", "Bearer "+fireworksAPIKey)
+	req.Header.Set("Content-Type", "application/json")
+
+	client := &http.Client{Timeout: 120 * time.Second}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != 200 {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, string(body))
+	}
+
+	respBody, _ := io.ReadAll(resp.Body)
+
+	var result struct {
+		Choices []struct {
+			Message struct {
+				Content          string `json:"content"`
+				ReasoningContent string `json:"reasoning_content"`
+			} `json:"message"`
+		} `json:"choices"`
+	}
+	if err := json.Unmarshal(respBody, &result); err != nil {
+		return nil, err
+	}
+
+	if len(result.Choices) == 0 {
+		return nil, fmt.Errorf("no response from API")
+	}
+
+	content := result.Choices[0].Message.Content
+	reasoning := result.Choices[0].Message.ReasoningContent
+
+	// K2.5 reasoning mode: actual JSON may be in content or reasoning_content
+	// Try content first, if it doesn't look like JSON, try reasoning_content
+	if !strings.Contains(content, "{") && reasoning != "" && strings.Contains(reasoning, "{") {
+		log.Printf("  [AI] Using reasoning_content (content had no JSON)")
+		content = reasoning
+	}
+
+	// Strip markdown code fences (```json ... ``` or ``` ... ```)
+	content = strings.TrimSpace(content)
+	if strings.HasPrefix(content, "```") {
+		// Remove opening fence (```json or ```)
+		if idx := strings.Index(content, "\n"); idx >= 0 {
+			content = content[idx+1:]
+		}
+		// Remove closing fence
+		if idx := strings.LastIndex(content, "```"); idx >= 0 {
+			content = content[:idx]
+		}
+		content = strings.TrimSpace(content)
+	}
+
+	// Extract JSON from response
+	if idx := strings.Index(content, "{"); idx >= 0 {
+		if end := strings.LastIndex(content, "}"); end > idx {
+			content = content[idx : end+1]
+		}
+	}
+
+	var analysis DocumentAnalysis
+	if err := json.Unmarshal([]byte(content), &analysis); err != nil {
+		// Last resort: try to find a JSON object with braces matching
+		cleaned := extractJSONObject(content)
+		if cleaned != "" {
+			if err2 := json.Unmarshal([]byte(cleaned), &analysis); err2 != nil {
+				log.Printf("  [AI debug] Failed to parse even after cleanup. Content starts: %.200s", content)
+				return nil, fmt.Errorf("failed to parse response: %w", err)
+			}
+		} else {
+			log.Printf("  [AI debug] No JSON object found in response. Content starts: %.200s", content)
+			return nil, fmt.Errorf("failed to parse response: %w", err)
+		}
+	}
+
+	// Validate category
+	validCats := map[string]bool{"taxes": true, "bills": true, "medical": true, "insurance": true, "legal": true, "financial": true, "expenses": true, "vehicles": true, "home": true, "personal": true, "contacts": true, "uncategorized": true}
+	if !validCats[analysis.Category] {
+		analysis.Category = "uncategorized"
+	}
+
+	return &analysis, nil
+}
+
+// extractJSONObject tries to find a balanced JSON object in a string
+func extractJSONObject(s string) string {
+	start := strings.Index(s, "{")
+	if start < 0 {
+		return ""
+	}
+	depth := 0
+	inString := false
+	escaped := false
+	for i := start; i < len(s); i++ {
+		c := s[i]
+		if escaped {
+			escaped = false
+			continue
+		}
+		if c == '\\' && inString {
+			escaped = true
+			continue
+		}
+		if c == '"' {
+			inString = !inString
+			continue
+		}
+		if inString {
+			continue
+		}
+		if c == '{' {
+			depth++
+		} else if c == '}' {
+			depth--
+			if depth == 0 {
+				return s[start : i+1]
+			}
+		}
+	}
+	return ""
+}
+
+// GenerateEmbedding creates a vector embedding using Fireworks
+func GenerateEmbedding(text string) ([]float32, error) {
+	if fireworksAPIKey == "" {
+		return nil, fmt.Errorf("FIREWORKS_API_KEY not set")
+	}
+
+	// Truncate
+	if len(text) > 32000 {
+		text = text[:32000]
+	}
+
+	reqBody := map[string]interface{}{
+		"model": "fireworks/qwen3-embedding-8b",
+		"input": text,
+	}
+	jsonBody, _ := json.Marshal(reqBody)
+
+	req, _ := http.NewRequest("POST", fireworksBaseURL+"/embeddings", bytes.NewReader(jsonBody))
+	req.Header.Set("Authorization", "Bearer "+fireworksAPIKey)
+	req.Header.Set("Content-Type", "application/json")
+
+	client := &http.Client{Timeout: 30 * time.Second}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != 200 {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("embedding API error %d: %s", resp.StatusCode, string(body))
+	}
+
+	var result struct {
+		Data []struct {
+			Embedding []float32 `json:"embedding"`
+		} `json:"data"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return nil, err
+	}
+
+	if len(result.Data) == 0 {
+		return nil, fmt.Errorf("no embedding returned")
+	}
+
+	return result.Data[0].Embedding, nil
+}
+
+// GetPDFPageCount returns the number of pages in a PDF
+func GetPDFPageCount(filePath string) int {
+	cmd := exec.Command("pdfinfo", filePath)
+	out, err := cmd.Output()
+	if err != nil {
+		return 1
+	}
+	for _, line := range strings.Split(string(out), "\n") {
+		if strings.HasPrefix(line, "Pages:") {
+			var count int
+			fmt.Sscanf(line, "Pages: %d", &count)
+			return count
+		}
+	}
+	return 1
+}
+
+// ProcessPDFPageByPage extracts text from each page separately
+func ProcessPDFPageByPage(filePath string, jobID string) (string, error) {
+	pageCount := GetPDFPageCount(filePath)
+	log.Printf("  Processing %d pages separately...", pageCount)
+
+	var allText strings.Builder
+	
+	for page := 1; page <= pageCount; page++ {
+		UpdateJob(jobID, "ocr", fmt.Sprintf("Page %d/%d", page, pageCount))
+		tmpDir, err := os.MkdirTemp("", "docsys-page")
+		if err != nil {
+			continue
+		}
+
+		// Convert single page to PNG
+		outPrefix := filepath.Join(tmpDir, "page")
+		cmd := exec.Command("pdftoppm", "-png", "-f", fmt.Sprintf("%d", page), "-l", fmt.Sprintf("%d", page), "-r", "150", filePath, outPrefix)
+		if err := cmd.Run(); err != nil {
+			os.RemoveAll(tmpDir)
+			continue
+		}
+
+		pngPath := filepath.Join(tmpDir, fmt.Sprintf("page-%d.png", page))
+		imageData, err := os.ReadFile(pngPath)
+		os.RemoveAll(tmpDir)
+		if err != nil {
+			continue
+		}
+
+		// OCR this page
+		log.Printf("  Page %d/%d...", page, pageCount)
+		pageAnalysis, err := AnalyzePageOnly(imageData, page)
+		if err != nil {
+			log.Printf("  Page %d failed: %v", page, err)
+			continue
+		}
+
+		if pageAnalysis != "" {
+			allText.WriteString(fmt.Sprintf("\n\n---\n## Page %d\n\n", page))
+			allText.WriteString(pageAnalysis)
+		}
+	}
+
+	return allText.String(), nil
+}
+
+// AnalyzePageOnly extracts just the text from a single page image
+func AnalyzePageOnly(imageData []byte, pageNum int) (string, error) {
+	if fireworksAPIKey == "" {
+		return "", fmt.Errorf("FIREWORKS_API_KEY not set")
+	}
+
+	b64 := base64.StdEncoding.EncodeToString(imageData)
+
+	prompt := `Transcribe ALL visible text on this page EXACTLY as it appears, in the ORIGINAL language of the document. DO NOT translate. DO NOT summarize. Output ONLY the transcribed text — no commentary, no analysis, no preamble, no "The document is..." sentences. Start directly with the content.
+
+FORMAT: Use ### for sections, **bold** for labels, markdown tables for tabular data, - bullets for lists. Preserve ALL numbers, dates, amounts, and values exactly as shown. If the document is in Russian, Dutch, German, French, or any other language — keep it in that language.`
+
+	reqBody := map[string]interface{}{
+		"model":      "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct",
+		"max_tokens": 4096,
+		"messages": []map[string]interface{}{
+			{
+				"role": "user",
+				"content": []map[string]interface{}{
+					{"type": "image_url", "image_url": map[string]string{"url": "data:image/png;base64," + b64}},
+					{"type": "text", "text": prompt},
+				},
+			},
+		},
+	}
+
+	jsonBody, _ := json.Marshal(reqBody)
+	req, _ := http.NewRequest("POST", fireworksBaseURL+"/chat/completions", bytes.NewReader(jsonBody))
+	req.Header.Set("Authorization", "Bearer "+fireworksAPIKey)
+	req.Header.Set("Content-Type", "application/json")
+
+	client := &http.Client{Timeout: 120 * time.Second}
+	resp, err := client.Do(req)
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != 200 {
+		body, _ := io.ReadAll(resp.Body)
+		return "", fmt.Errorf("API error %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Read raw response to debug content vs reasoning_content
+	rawBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", err
+	}
+
+	var result struct {
+		Choices []struct {
+			Message struct {
+				Content          string `json:"content"`
+				ReasoningContent string `json:"reasoning_content"`
+			} `json:"message"`
+		} `json:"choices"`
+	}
+	if err := json.Unmarshal(rawBody, &result); err != nil {
+		return "", err
+	}
+
+	if len(result.Choices) == 0 {
+		return "", fmt.Errorf("no response")
+	}
+
+	content := result.Choices[0].Message.Content
+	reasoning := result.Choices[0].Message.ReasoningContent
+
+	if reasoning != "" {
+		log.Printf("  [OCR debug] reasoning_content length: %d, content length: %d", len(reasoning), len(content))
+		if len(content) > 100 {
+			log.Printf("  [OCR debug] content starts: %.100s", content)
+		}
+	}
+
+	// If content is empty but reasoning has text, model put everything in wrong field
+	if strings.TrimSpace(content) == "" && reasoning != "" {
+		log.Printf("  [OCR debug] WARNING: content empty, using reasoning_content")
+		content = reasoning
+	}
+
+	return strings.TrimSpace(content), nil
+}
+
+// ProcessDocument handles the full document processing pipeline
+func ProcessDocument(filePath string) (*Document, error) {
+	log.Printf("Processing: %s", filepath.Base(filePath))
+
+	ext := strings.ToLower(filepath.Ext(filePath))
+
+	// Get file hash
+	hash, err := FileHash(filePath)
+	if err != nil {
+		return nil, fmt.Errorf("hash failed: %w", err)
+	}
+	log.Printf("  Hash: %s", hash)
+
+	// Start progress tracking
+	StartJob(hash, filepath.Base(filePath))
+	defer FinishJob(hash)
+
+	// Check if already fully processed (not pending)
+	if existing, _ := GetDocument(hash); existing != nil && existing.Status == "ready" {
+		log.Printf("  Already exists, skipping")
+		os.Remove(filePath)
+		return existing, nil
+	}
+
+	var analysis *DocumentAnalysis
+
+	if IsTextFile(ext) {
+		// Plain text — read and analyze
+		data, err := os.ReadFile(filePath)
+		if err != nil {
+			return nil, err
+		}
+		UpdateJob(hash, "classifying", "Analyzing text...")
+		log.Printf("  Analyzing text with K2...")
+		analysis, err = AnalyzeText(string(data), filepath.Base(filePath))
+		if err != nil {
+			UpdateJob(hash, "error", err.Error())
+			return nil, fmt.Errorf("text analysis failed: %w", err)
+		}
+	} else if IsOfficeFile(ext) {
+		// Office formats — extract text natively from ZIP/XML, no LibreOffice needed
+		UpdateJob(hash, "converting", "Extracting text...")
+		log.Printf("  Extracting text from %s...", ext)
+		text, err := ExtractOfficeText(filePath)
+		if err != nil {
+			UpdateJob(hash, "error", err.Error())
+			return nil, fmt.Errorf("office text extraction failed: %w", err)
+		}
+		log.Printf("  Extracted %d chars, classifying...", len(text))
+		UpdateJob(hash, "classifying", "Classifying...")
+		analysis, err = AnalyzeText(text, filepath.Base(filePath))
+		if err != nil {
+			UpdateJob(hash, "error", err.Error())
+			return nil, fmt.Errorf("text analysis failed: %w", err)
+		}
+	} else {
+		// Vision — convert to image and analyze
+		UpdateJob(hash, "converting", "Converting to image...")
+		log.Printf("  Converting to image...")
+		imageData, err := ConvertToImage(filePath)
+		if err != nil {
+			UpdateJob(hash, "error", err.Error())
+			return nil, fmt.Errorf("image conversion failed: %w", err)
+		}
+		UpdateJob(hash, "ocr", "Analyzing first page...")
+		log.Printf("  Analyzing with K2.5 vision...")
+		analysis, err = AnalyzeWithVision(imageData)
+		if err != nil {
+			// Vision JSON extraction failed — fall back to two-step: plain-text OCR + text classifier
+			log.Printf("  AnalyzeWithVision failed (%v), falling back to OCR+classify...", err)
+			UpdateJob(hash, "ocr", "Falling back to OCR + classify...")
+			pageText, ocrErr := AnalyzePageOnly(imageData, 1)
+			if ocrErr != nil {
+				UpdateJob(hash, "error", ocrErr.Error())
+				return nil, fmt.Errorf("vision analysis failed (primary: %v, fallback: %w)", err, ocrErr)
+			}
+			// Classify the extracted text
+			log.Printf("  OCR succeeded (%d chars), classifying...", len(pageText))
+			UpdateJob(hash, "classifying", "Classifying extracted text...")
+			analysis, err = AnalyzeText(pageText, filepath.Base(filePath))
+			if err != nil {
+				// Use minimal stub so at least the doc is stored with its text
+				log.Printf("  Classification failed too: %v — storing with minimal metadata", err)
+				analysis = &DocumentAnalysis{
+					Category: "uncategorized",
+					DocType:  "unknown",
+					Title:    strings.TrimSuffix(filepath.Base(filePath), filepath.Ext(filePath)),
+					Summary:  "Extraction failed — stored raw text only",
+					FullText: pageText,
+				}
+			} else {
+				analysis.FullText = pageText
+			}
+		}
+
+		// For PDFs, process pages for accurate OCR
+		if ext == ".pdf" {
+			pageCount := GetPDFPageCount(filePath)
+			if pageCount > 1 {
+				log.Printf("  Multi-page PDF detected (%d pages)", pageCount)
+				UpdateJob(hash, "ocr", fmt.Sprintf("Multi-page PDF: %d pages", pageCount))
+				fullText, err := ProcessPDFPageByPage(filePath, hash)
+				if err == nil && fullText != "" {
+					analysis.FullText = fullText
+				}
+			} else if analysis.FullText == "" || len(analysis.FullText) < 50 || strings.HasPrefix(analysis.FullText, "[") {
+				// Single-page but full_text is empty/placeholder — retry with AnalyzePageOnly
+				log.Printf("  Single-page PDF with bad full_text (%q) — retrying with AnalyzePageOnly...", analysis.FullText)
+				UpdateJob(hash, "ocr", "Retrying text extraction...")
+				if pageText, err := AnalyzePageOnly(imageData, 1); err == nil && pageText != "" {
+					analysis.FullText = pageText
+				} else if err != nil {
+					log.Printf("  AnalyzePageOnly fallback failed: %v", err)
+				}
+			}
+		}
+	}
+
+	log.Printf("  Category: %s, Type: %s", analysis.Category, analysis.DocType)
+
+	// Copy to store
+	storePath := filepath.Join(storeDir, hash+ext)
+	if err := copyFile(filePath, storePath); err != nil {
+		return nil, fmt.Errorf("store copy failed: %w", err)
+	}
+
+	// Create document record
+	// Use title if provided, fall back to summary
+	title := analysis.Title
+	if title == "" {
+		title = analysis.Summary
+	}
+	
+	doc := &Document{
+		ID:           hash,
+		Title:        title,
+		Category:     analysis.Category,
+		Type:         analysis.DocType,
+		Date:         analysis.Date,
+		Amount:       analysis.AmountString(),
+		Vendor:       analysis.Vendor,
+		Summary:      analysis.Summary,
+		FullText:     analysis.FullText,
+		PDFPath:      storePath,
+		OriginalFile: filepath.Base(filePath),
+		ProcessedAt:  time.Now().Format(time.RFC3339),
+		Status:       "ready",
+	}
+
+	// Save to database
+	if err := InsertDocument(doc); err != nil {
+		return nil, fmt.Errorf("db insert failed: %w", err)
+	}
+
+	// Generate embedding
+	if analysis.FullText != "" {
+		UpdateJob(hash, "embedding", "Generating search index...")
+		log.Printf("  Generating embedding...")
+		if emb, err := GenerateEmbedding(analysis.FullText); err == nil {
+			log.Printf("  Embedding: %d dimensions", len(emb))
+			StoreEmbedding(hash, emb)
+		} else {
+			log.Printf("  Embedding failed: %v", err)
+		}
+	}
+
+	// Remove from inbox
+	os.Remove(filePath)
+
+	log.Printf("  ✓ Done: %s/%s", analysis.Category, hash)
+	return doc, nil
+}
+
+func copyFile(src, dst string) error {
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+
+	out, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+
+	_, err = io.Copy(out, in)
+	return err
+}
+
+// ExtractOfficeText extracts plain text from DOCX/XLSX/PPTX natively.
+// These are ZIP archives containing XML — no LibreOffice needed.
+func ExtractOfficeText(filePath string) (string, error) {
+	ext := strings.ToLower(filepath.Ext(filePath))
+
+	r, err := zip.OpenReader(filePath)
+	if err != nil {
+		return "", fmt.Errorf("not a valid Office file: %w", err)
+	}
+	defer r.Close()
+
+	// Which XML paths to extract per format
+	var targets []string
+	switch ext {
+	case ".docx", ".doc", ".odt", ".rtf":
+		targets = []string{"word/document.xml", "word/body.xml"}
+	case ".xlsx", ".xls":
+		// All sheets
+		for _, f := range r.File {
+			if strings.HasPrefix(f.Name, "xl/worksheets/sheet") && strings.HasSuffix(f.Name, ".xml") {
+				targets = append(targets, f.Name)
+			}
+		}
+		// Shared strings (cell values are stored here for xlsx)
+		targets = append(targets, "xl/sharedStrings.xml")
+	case ".pptx", ".ppt":
+		for _, f := range r.File {
+			if strings.HasPrefix(f.Name, "ppt/slides/slide") && strings.HasSuffix(f.Name, ".xml") {
+				targets = append(targets, f.Name)
+			}
+		}
+	default:
+		return "", fmt.Errorf("unsupported office format: %s", ext)
+	}
+
+	var sb strings.Builder
+	for _, f := range r.File {
+		found := false
+		for _, t := range targets {
+			if f.Name == t {
+				found = true
+				break
+			}
+		}
+		if !found {
+			continue
+		}
+
+		rc, err := f.Open()
+		if err != nil {
+			continue
+		}
+		data, err := io.ReadAll(rc)
+		rc.Close()
+		if err != nil {
+			continue
+		}
+
+		sb.WriteString(xmlToText(data))
+		sb.WriteString("\n")
+	}
+
+	text := strings.TrimSpace(sb.String())
+	if text == "" {
+		return "", fmt.Errorf("no text extracted from %s", filepath.Base(filePath))
+	}
+	return text, nil
+}
+
+// xmlToText strips XML tags and decodes entities, returning plain text.
+func xmlToText(data []byte) string {
+	var sb strings.Builder
+	inTag := false
+	lastWasSpace := false
+
+	for i := 0; i < len(data); i++ {
+		c := data[i]
+		switch {
+		case c == '<':
+			inTag = true
+			// Emit space between elements so words don't run together
+			if !lastWasSpace {
+				sb.WriteByte(' ')
+				lastWasSpace = true
+			}
+		case c == '>':
+			inTag = false
+		case !inTag:
+			if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
+				if !lastWasSpace {
+					sb.WriteByte(' ')
+					lastWasSpace = true
+				}
+			} else {
+				sb.WriteByte(c)
+				lastWasSpace = false
+			}
+		}
+	}
+
+	// Decode common XML entities
+	result := sb.String()
+	result = strings.ReplaceAll(result, "&amp;", "&")
+	result = strings.ReplaceAll(result, "&lt;", "<")
+	result = strings.ReplaceAll(result, "&gt;", ">")
+	result = strings.ReplaceAll(result, "&quot;", "\"")
+	result = strings.ReplaceAll(result, "&apos;", "'")
+	result = strings.ReplaceAll(result, "&#xA;", "\n")
+	result = strings.ReplaceAll(result, "&#x9;", "\t")
+
+	return strings.TrimSpace(result)
+}
+
+// IsOfficeFile returns true for formats with extractable XML text.
+func IsOfficeFile(ext string) bool {
+	return map[string]bool{
+		".docx": true, ".xlsx": true, ".pptx": true,
+		".odt": true, ".ods": true, ".odp": true,
+	}[ext]
+}
--- a/db.go
+++ b/db.go
@ -337,7 +337,7 @@ func SearchDocuments(query string, limit int) ([]Document, error) {
 		FROM documents d
 		JOIN documents_fts fts ON d.id = fts.id
 		WHERE documents_fts MATCH ?
-		ORDER BY rank
+		ORDER BY d.processed_at DESC
 		LIMIT ?
 	`, query, limit)

@ -715,3 +715,78 @@ func scanDocumentRows(rows *sql.Rows) ([]Document, error) {
 	}
 	return docs, rows.Err()
 }
+
+// SearchDocumentsWithFullText is like SearchDocuments but includes the full_text
+// column. Used by the format=md endpoint where OCR content is needed.
+func SearchDocumentsWithFullText(query string, limit int) ([]Document, error) {
+	if limit <= 0 {
+		limit = 200
+	}
+
+	rows, err := db.Query(`
+		SELECT d.id, COALESCE(d.title,''), COALESCE(d.category,''), COALESCE(d.type,''),
+		       COALESCE(d.date,''), COALESCE(d.amount,''), COALESCE(d.vendor,''),
+		       COALESCE(d.summary,''), COALESCE(d.full_text,''), COALESCE(d.pdf_path,''),
+		       COALESCE(d.processed_at,''), COALESCE(d.original_file,''), COALESCE(d.status,'ready')
+		FROM documents d
+		JOIN documents_fts fts ON d.id = fts.id
+		WHERE documents_fts MATCH ?
+		ORDER BY d.processed_at DESC
+		LIMIT ?
+	`, query, limit)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var docs []Document
+	for rows.Next() {
+		var doc Document
+		if err := rows.Scan(
+			&doc.ID, &doc.Title, &doc.Category, &doc.Type, &doc.Date,
+			&doc.Amount, &doc.Vendor, &doc.Summary, &doc.FullText, &doc.PDFPath,
+			&doc.ProcessedAt, &doc.OriginalFile, &doc.Status,
+		); err != nil {
+			continue
+		}
+		docs = append(docs, doc)
+	}
+	return docs, rows.Err()
+}
+
+// SearchDocumentsWithFullTextFallback is the LIKE-based fallback that also includes full_text.
+func SearchDocumentsWithFullTextFallback(query string, limit int) ([]Document, error) {
+	if limit <= 0 {
+		limit = 200
+	}
+	pattern := "%" + query + "%"
+
+	rows, err := db.Query(`
+		SELECT id, COALESCE(title,''), COALESCE(category,''), COALESCE(type,''),
+		       COALESCE(date,''), COALESCE(amount,''), COALESCE(vendor,''),
+		       COALESCE(summary,''), COALESCE(full_text,''), COALESCE(pdf_path,''),
+		       COALESCE(processed_at,''), COALESCE(original_file,''), COALESCE(status,'ready')
+		FROM documents
+		WHERE title LIKE ? OR summary LIKE ? OR vendor LIKE ? OR full_text LIKE ?
+		ORDER BY processed_at DESC
+		LIMIT ?
+	`, pattern, pattern, pattern, pattern, limit)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var docs []Document
+	for rows.Next() {
+		var doc Document
+		if err := rows.Scan(
+			&doc.ID, &doc.Title, &doc.Category, &doc.Type, &doc.Date,
+			&doc.Amount, &doc.Vendor, &doc.Summary, &doc.FullText, &doc.PDFPath,
+			&doc.ProcessedAt, &doc.OriginalFile, &doc.Status,
+		); err != nil {
+			continue
+		}
+		docs = append(docs, doc)
+	}
+	return docs, rows.Err()
+}
--- a/docsys.bak-20260225-134659
+++ b/docsys.bak-20260225-134659
--- a/main.go
+++ b/main.go
@ -72,6 +72,10 @@ func main() {
 		"title":        strings.Title,
 		"safe":         func(s string) template.HTML { return template.HTML(s) },
 		"multiply":     func(a float64, b float64) float64 { return a * b },
+		"isImage": func(filename string) bool {
+			ext := strings.ToLower(filepath.Ext(filename))
+			return ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".gif" || ext == ".webp" || ext == ".tiff"
+		},
 	}

 	r := chi.NewRouter()
@ -84,6 +88,7 @@ func main() {

 	// PDF serving
 	r.Get("/pdf/{hash}", servePDF)
+	r.Get("/img/{hash}", serveImage)

 	// Pages
 	r.Get("/", dashboardHandler)
@ -93,6 +98,7 @@ func main() {
 	r.Get("/search", searchHandler)

 	// API endpoints
+	r.Get("/api/search", apiSearchMDHandler)
 	r.Post("/api/search", apiSearchHandler)
 	r.Get("/api/documents", apiDocumentsHandler)
 	r.Get("/api/processing", apiProcessingHandler)
@ -151,6 +157,8 @@ func categoryIcon(cat string) string {

 func formatDate(s string) string {
 	formats := []string{
+		"2006-01-02T15:04:05-07:00",
+		"2006-01-02T15:04:05.999999-07:00",
 		"2006-01-02T15:04:05.999999",
 		"2006-01-02T15:04:05",
 		"2006-01-02",
@ -323,6 +331,26 @@ func servePDF(w http.ResponseWriter, r *http.Request) {
 	http.Error(w, "File not found", http.StatusNotFound)
 }

+func serveImage(w http.ResponseWriter, r *http.Request) {
+	hash := chi.URLParam(r, "hash")
+	mimeTypes := map[string]string{
+		".jpg": "image/jpeg", ".jpeg": "image/jpeg",
+		".png": "image/png", ".gif": "image/gif",
+		".webp": "image/webp", ".tiff": "image/tiff",
+	}
+	for ext, mime := range mimeTypes {
+		path := filepath.Join(storeDir, hash+ext)
+		if _, err := os.Stat(path); err == nil {
+			w.Header().Set("Content-Type", mime)
+			w.Header().Set("Cache-Control", "private, max-age=3600")
+			http.ServeFile(w, r, path)
+			return
+		}
+	}
+	http.NotFound(w, r)
+}
+
+
 // sanitizeFilename removes characters unsafe for use in Content-Disposition filenames.
 func sanitizeFilename(name string) string {
 	replacer := strings.NewReplacer(`"`, "'", "/", "-", "\\", "-", "\n", " ", "\r", "")
@ -347,6 +375,77 @@ func apiSearchHandler(w http.ResponseWriter, r *http.Request) {
 	renderPartial(w, "document-list", docs)
 }

+// apiSearchMDHandler handles GET /api/search?q={query}&format=md
+// Returns all matching documents as concatenated plain-text markdown,
+// one document per section separated by ---. Intended for AI/LLM consumption.
+func apiSearchMDHandler(w http.ResponseWriter, r *http.Request) {
+	query := r.URL.Query().Get("q")
+	format := r.URL.Query().Get("format")
+
+	// Only serve markdown format; anything else falls back to a 400
+	if format != "md" {
+		http.Error(w, "format=md required", http.StatusBadRequest)
+		return
+	}
+	if query == "" {
+		http.Error(w, "q parameter required", http.StatusBadRequest)
+		return
+	}
+
+	docs, err := SearchDocumentsWithFullText(query, 200)
+	if err != nil {
+		docs, _ = SearchDocumentsWithFullTextFallback(query, 200)
+	}
+
+	w.Header().Set("Content-Type", "text/markdown; charset=utf-8")
+
+	if len(docs) == 0 {
+		w.Write([]byte("No documents found matching: " + query + "\n"))
+		return
+	}
+
+	var sb strings.Builder
+	for i, doc := range docs {
+		sb.WriteString("# Document: ")
+		if doc.Title != "" {
+			sb.WriteString(doc.Title)
+		} else {
+			sb.WriteString("(untitled)")
+		}
+		sb.WriteString("\n")
+		sb.WriteString("ID: ")
+		sb.WriteString(doc.ID)
+		if doc.Category != "" {
+			sb.WriteString(" | Category: ")
+			sb.WriteString(doc.Category)
+		}
+		if doc.Date != "" {
+			sb.WriteString(" | Date: ")
+			sb.WriteString(doc.Date)
+		}
+		if doc.Vendor != "" {
+			sb.WriteString(" | Vendor: ")
+			sb.WriteString(doc.Vendor)
+		}
+		sb.WriteString("\n\n")
+
+		text := doc.FullText
+		if text == "" {
+			text = doc.Summary
+		}
+		if text != "" {
+			sb.WriteString(text)
+			sb.WriteString("\n")
+		}
+
+		if i < len(docs)-1 {
+			sb.WriteString("\n---\n\n")
+		}
+	}
+
+	w.Write([]byte(sb.String()))
+}
+
 func apiProcessingHandler(w http.ResponseWriter, r *http.Request) {
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(GetActiveJobs())
@ -444,6 +543,7 @@ func ingestHandler(w http.ResponseWriter, r *http.Request) {
 	var req struct {
 		Filename string `json:"filename"`
 		Content  string `json:"content"`
+		URL      string `json:"url"`
 		Source   string `json:"source"`
 		Subject  string `json:"subject"`
 		From     string `json:"from"`
@ -454,16 +554,73 @@ func ingestHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}

-	if req.Filename == "" || req.Content == "" {
-		http.Error(w, "filename and content are required", http.StatusBadRequest)
-		return
-	}
+	var data []byte

-	// Decode base64 content
-	data, err := base64.StdEncoding.DecodeString(req.Content)
-	if err != nil {
-		http.Error(w, "Invalid base64 content", http.StatusBadRequest)
-		return
+	if req.URL != "" {
+		// Fetch from URL
+		resp, err := http.Get(req.URL)
+		if err != nil {
+			http.Error(w, "Failed to fetch URL: "+err.Error(), http.StatusBadGateway)
+			return
+		}
+		defer resp.Body.Close()
+		if resp.StatusCode >= 400 {
+			http.Error(w, fmt.Sprintf("URL returned %d", resp.StatusCode), http.StatusBadGateway)
+			return
+		}
+		data, err = io.ReadAll(resp.Body)
+		if err != nil {
+			http.Error(w, "Failed to read URL content", http.StatusInternalServerError)
+			return
+		}
+		// Derive filename from URL or content-type if not provided
+		if req.Filename == "" {
+			ext := ""
+			ct := resp.Header.Get("Content-Type")
+			switch {
+			case strings.Contains(ct, "jpeg") || strings.Contains(ct, "jpg"):
+				ext = ".jpg"
+			case strings.Contains(ct, "png"):
+				ext = ".png"
+			case strings.Contains(ct, "gif"):
+				ext = ".gif"
+			case strings.Contains(ct, "webp"):
+				ext = ".webp"
+			case strings.Contains(ct, "pdf"):
+				ext = ".pdf"
+			case strings.Contains(ct, "tiff"):
+				ext = ".tiff"
+			default:
+				// Try to get extension from URL path
+				urlPath := resp.Request.URL.Path
+				if e := filepath.Ext(urlPath); e != "" {
+					ext = e
+				} else {
+					ext = ".bin"
+				}
+			}
+			// Use last path segment of URL as base name
+			base := filepath.Base(resp.Request.URL.Path)
+			if base == "." || base == "/" {
+				base = "url-import"
+			}
+			if filepath.Ext(base) == "" {
+				base += ext
+			}
+			req.Filename = base
+		}
+	} else {
+		if req.Filename == "" || req.Content == "" {
+			http.Error(w, "filename and content required, or provide url", http.StatusBadRequest)
+			return
+		}
+		// Decode base64 content
+		var err error
+		data, err = base64.StdEncoding.DecodeString(req.Content)
+		if err != nil {
+			http.Error(w, "Invalid base64 content", http.StatusBadRequest)
+			return
+		}
 	}

 	// Sanitize filename
--- a/templates/dashboard.html
+++ b/templates/dashboard.html
@ -120,7 +120,7 @@
                {{if .Stats.RecentUploads}}
                <div class="divide-y divide-gray-100 dark:divide-gray-700">
                    {{range .Stats.RecentUploads}}
-                    <a href="/document/{{.ID}}" class="flex items-center px-5 py-4 hover:bg-gray-50 dark:hover:bg-gray-700/50 transition-colors group">
+                    <div class="flex items-center px-5 py-4 hover:bg-gray-50 dark:hover:bg-gray-700/50 transition-colors group cursor-pointer" onclick="location.href='/document/{{.ID}}'">
                        <div class="flex-shrink-0 p-2 bg-gray-100 dark:bg-gray-700 rounded-lg mr-4">
                            <svg class="w-6 h-6 text-gray-500 dark:text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"></path>
@ -130,27 +130,34 @@
                            <p class="font-medium text-gray-900 dark:text-white truncate group-hover:text-brand-600 dark:group-hover:text-brand-400 transition-colors">{{.Title}}</p>
                            <p class="text-sm text-gray-500 dark:text-gray-400 truncate">{{truncate .Summary 100}}</p>
                        </div>
-                        <div class="flex-shrink-0 ml-4 text-right">
-                            {{if eq .Status "processing"}}
-                            <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-300" data-processing="{{.ID}}">
-                                <svg class="animate-spin -ml-0.5 mr-1.5 h-3 w-3" fill="none" viewBox="0 0 24 24">
-                                    <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
-                                    <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+                        <div class="flex-shrink-0 ml-4 flex items-center gap-3">
+                            <div class="text-right">
+                                {{if eq .Status "processing"}}
+                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-300" data-processing="{{.ID}}">
+                                    <svg class="animate-spin -ml-0.5 mr-1.5 h-3 w-3" fill="none" viewBox="0 0 24 24">
+                                        <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                                        <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+                                    </svg>
+                                    Processing
+                                </span>
+                                {{else if eq .Status "error"}}
+                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
+                                    Error
+                                </span>
+                                {{else}}
+                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-brand-100 dark:bg-brand-900/30 text-brand-700 dark:text-brand-300">
+                                    {{title .Category}}
+                                </span>
+                                {{end}}
+                                <p class="text-xs text-gray-400 dark:text-gray-500 mt-1">{{formatDateTime .ProcessedAt}}</p>
+                            </div>
+                            <button onclick="event.stopPropagation(); deleteDoc('{{.ID}}', '{{.Title}}')" class="opacity-0 group-hover:opacity-100 p-1.5 text-gray-400 hover:text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-all" title="Delete">
+                                <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"></path>
                                </svg>
-                                Processing
-                            </span>
-                            {{else if eq .Status "error"}}
-                            <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
-                                Error
-                            </span>
-                            {{else}}
-                            <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-brand-100 dark:bg-brand-900/30 text-brand-700 dark:text-brand-300">
-                                {{title .Category}}
-                            </span>
-                            {{end}}
-                            <p class="text-xs text-gray-400 dark:text-gray-500 mt-1">{{formatDateTime .ProcessedAt}}</p>
+                            </button>
                        </div>
-                    </a>
+                    </div>
                    {{end}}
                </div>
                {{else}}
@ -292,6 +299,51 @@
        }
    });
    
+    // Clipboard paste — handles Ctrl+V of images/screenshots
+    document.addEventListener('paste', async (e) => {
+        const items = Array.from(e.clipboardData.items).filter(i => i.type.startsWith('image/'));
+        if (!items.length) return;
+        e.preventDefault();
+        progress.classList.remove('hidden');
+        const pendingIds = [];
+        for (const item of items) {
+            const file = item.getAsFile();
+            if (!file) continue;
+            const ext = item.type.split('/')[1] || 'png';
+            const name = `paste-${Date.now()}.${ext}`;
+            try {
+                const formData = new FormData();
+                formData.append('file', new File([file], name, { type: item.type }));
+                const res = await fetch('/api/upload', { method: 'POST', body: formData });
+                const data = await res.json();
+                if (data.status === 'success') {
+                    showToast('✓ Pasted image', 'success');
+                    pendingIds.push(data.id);
+                } else {
+                    showToast('Paste failed', 'error');
+                }
+            } catch (err) {
+                showToast('Paste failed: ' + err.message, 'error');
+            }
+        }
+        if (pendingIds.length > 0) {
+            sessionStorage.setItem('pendingDocs', JSON.stringify(pendingIds));
+            window.location.reload();
+        } else {
+            progress.classList.add('hidden');
+        }
+    });
+
+    async function deleteDoc(id, title) {
+        if (!confirm('Delete "' + title + '"?')) return;
+        const res = await fetch('/api/document/' + id, { method: 'DELETE' });
+        if (res.ok) {
+            window.location.reload();
+        } else {
+            alert('Failed to delete document');
+        }
+    }
+
    // Check for pending docs on page load (from web upload)
    const pendingDocsJson = sessionStorage.getItem('pendingDocs');
    if (pendingDocsJson) {
--- a/templates/document.html
+++ b/templates/document.html
@ -26,6 +26,12 @@
            </div>
        </div>
        <div class="flex gap-2">
+            <button onclick="deleteDocument()" class="inline-flex items-center px-3 py-2 bg-white dark:bg-gray-800 text-red-600 dark:text-red-400 text-sm font-medium rounded-lg border border-red-200 dark:border-red-800 hover:bg-red-50 dark:hover:bg-red-900/20 transition-all">
+                <svg class="w-4 h-4 mr-1.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"></path>
+                </svg>
+                Delete
+            </button>
            <button onclick="toggleEdit()" class="inline-flex items-center px-3 py-2 bg-white dark:bg-gray-800 text-gray-700 dark:text-gray-200 text-sm font-medium rounded-lg border border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700 transition-all">
                <svg class="w-4 h-4 mr-1.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"></path>
@ -163,7 +169,16 @@
        {{end}}

        <div>
-            {{if .Document.PDFPath}}
+            {{if isImage .Document.OriginalFile}}
+            <div class="bg-white dark:bg-gray-800 rounded-2xl shadow-sm border border-gray-100 dark:border-gray-700 overflow-hidden">
+                <div class="px-6 py-4 border-b border-gray-100 dark:border-gray-700">
+                    <h2 class="font-semibold text-gray-900 dark:text-white">Preview</h2>
+                </div>
+                <div class="p-4 flex justify-center">
+                    <img src="/img/{{.Document.ID}}" alt="{{.Document.Title}}" class="max-w-full rounded-lg shadow-sm" style="max-height:600px;object-fit:contain;">
+                </div>
+            </div>
+            {{else if .Document.PDFPath}}
            <div class="bg-white dark:bg-gray-800 rounded-2xl shadow-sm border border-gray-100 dark:border-gray-700 overflow-hidden">
                <div class="px-6 py-4 border-b border-gray-100 dark:border-gray-700 flex items-center justify-between">
                    <h2 class="font-semibold text-gray-900 dark:text-white">Document Preview</h2>
--- a/templates/partials/document-list.html
+++ b/templates/partials/document-list.html
@ -8,7 +8,10 @@
            <p class="font-medium text-gray-900 dark:text-white truncate text-sm">{{.Title}}</p>
            <p class="text-xs text-gray-500 dark:text-gray-400 truncate">{{truncate .Summary 50}}</p>
        </div>
-        <span class="ml-2 text-xs text-gray-400">{{title .Category}}</span>
+        <div class="ml-2 text-right flex-shrink-0">
+            <span class="text-xs text-gray-400 block">{{title .Category}}</span>
+            {{if .ProcessedAt}}<span class="text-xs text-gray-400 block">{{formatDate .ProcessedAt}}</span>{{end}}
+        </div>
    </a>
    {{end}}
 </div>
--- a/templates/search.html
+++ b/templates/search.html
@ -56,7 +56,9 @@
                                {{title .Type}}
                            </span>
                            {{end}}
-                            {{if .Date}}
+                            {{if .ProcessedAt}}
+                            <span class="text-xs text-gray-500 dark:text-gray-400">📅 {{formatDateTime .ProcessedAt}}</span>
+                            {{else if .Date}}
                            <span class="text-xs text-gray-500 dark:text-gray-400">{{formatDate .Date}}</span>
                            {{end}}
                            {{if .Score}}
Author	SHA1	Message	Date
James	a1d2546117	Search: sort by timestamp desc, display dates in results - FTS search results now ORDER BY processed_at DESC (was rank) - Full search page shows processed timestamp with formatDateTime - Quick search dropdown shows formatted date next to category - Fixed formatDate to handle timezone-aware timestamps	2026-03-31 13:34:54 -04:00
James	31d6cb6f86	fix: skip store copy if file already exists (reprocess safety) When reprocessing a document whose PDF is already in the store, copyFile() would fail with 'open /srv/docsys/inbox/...: no such file or directory' because the upload wrote to a temp inbox path that was already cleaned up by the time async OCR completed. The store is keyed by content hash so if the file is already there, the copy is a no-op — skip it rather than error out.	2026-03-23 14:27:38 -04:00
James	883f118d66	fix: pdftoppm output filename glob instead of hardcoded page-1.png pdftoppm zero-pads the page number based on total page count: - <10 pages: page-1.png - <100 pages: page-01.png - <1000 pages: page-001.png The code hardcoded 'page-1.png' and 'page-N.png', which fails for any multi-page document. Use filepath.Glob('page-*.png') to find the actual output regardless of padding width. Fixed in both ConvertToImage() (first-page preview) and the multi-page OCR loop in ProcessDocument().	2026-03-23 14:14:28 -04:00
James	9622ab9390	fix: format=md endpoint now returns full OCR text (full_text field) SearchDocuments excludes full_text for performance. The MD endpoint needs the actual OCR content, not just the summary. Added SearchDocumentsWithFullText() and SearchDocumentsWithFullTextFallback() that select full_text explicitly. apiSearchMDHandler now uses these, so format=md returns the complete OCR/markdown text for each document.	2026-03-23 14:07:20 -04:00
James	405a6f697f	feat: add GET /api/search?q=...&format=md for AI/LLM consumption New endpoint returns all matching documents as concatenated plain-text markdown, one section per document separated by ---. Format: # Document: {title} ID: {id} \| Category: {category} \| Date: {date} \| Vendor: {vendor} {full_text or summary} --- Parameters: q - search query (required) format - must be 'md' (required; distinguishes from HTML search) Uses same FTS5 search as existing endpoints, limit raised to 200. Falls back to LIKE search if FTS5 fails. Returns text/markdown content type. POST /api/search (HTML partial) unchanged.	2026-03-23 13:58:47 -04:00
James	63d4e5e5ca	chore: auto-commit uncommitted changes	2026-02-28 06:01:28 -05:00
James	2c91d5649e	chore: auto-commit uncommitted changes	2026-02-28 00:01:21 -05:00
James	bbc029196a	chore: auto-commit uncommitted changes	2026-02-25 18:01:27 -05:00
James	83373885d4	Add vocabulary hints for handwriting: Jongsma, Johan, Tatyana, St. Petersburg FL	2026-02-25 14:24:04 -05:00
James	1b4c82ab83	Improve title prompt: require specific, identifying titles with sender+topic+date	2026-02-25 14:21:43 -05:00