chore: auto-commit uncommitted changes

2026-02-28 06:01:28 -05:00 · 2026-02-28 06:01:28 -05:00 · 63d4e5e5ca
parent 2c91d5649e
commit 63d4e5e5ca
5 changed files with 1215 additions and 69 deletions
--- a/ai.go
+++ b/ai.go
@ -90,7 +90,7 @@ func ConvertToImage(filePath string) ([]byte, error) {
 		}
 		defer os.RemoveAll(tmpDir)
-		cmd := exec.Command("libreoffice", "--headless", "--convert-to", "pdf", "--outdir", tmpDir, filePath)
+		cmd := exec.Command("soffice", "--headless", "--convert-to", "pdf", "--outdir", tmpDir, filePath)
 		if err := cmd.Run(); err != nil {
 			return nil, fmt.Errorf("libreoffice conversion failed: %w", err)
 		}
@ -124,6 +124,7 @@ func ConvertToImage(filePath string) ([]byte, error) {
 	return os.ReadFile(filePath)
 }
 // IsTextFile returns true for plain text files
 func IsTextFile(ext string) bool {
 	textExts := map[string]bool{
@ -160,7 +161,7 @@ func AnalyzeWithVision(imageData []byte) (*DocumentAnalysis, error) {
   - vendor: Company/organization name
   - amount: Dollar/currency amount if present (e.g., "$123.45" or "809,400 BYN")
-5. **Title**: Specific English title (max 8 words) that identifies THIS document uniquely. Include the key distinguishing details: who sent it, what it's about, and when. Bad: "Financial Report" or "Invoice". Good: "N-able Technology Exchange Rate Loss Explanation Feb 2025" or "Duke Energy Electric Bill Oct 2024" or "IRS Form 1099-INT Chase Bank 2024" or "BayCare HomeCare Invoice $340 Nov 2025". Never use generic words like "Document", "Letter", "Report" alone — always qualify them.
+5. **Title**: Specific English title (max 8 words) that identifies THIS document uniquely. Ask yourself: "If I had to find this document again, what would I search for?" Prioritize the most unique/recoverable detail: an account number, confirmation code, ID, or specific value that can't be inferred from context. If the document's main value is a number or ID, put it in the title. Bad: "FedEx Receipt Jan 2026" (generic). Good: "FedEx Account 203634010" or "Duke Energy Electric Bill Oct 2024" or "IRS Form 1099-INT Chase Bank 2024". Never use generic words like "Document", "Letter", "Report" alone — always qualify them.
 6. **Summary**: 1-2 sentence English description with key details.
@ -234,12 +235,20 @@ func AnalyzeText(text, filename string) (*DocumentAnalysis, error) {
 Categorize into ONE of: taxes, bills, medical, insurance, legal, financial, expenses, vehicles, home, personal, contacts, uncategorized
 For the title: specific and concise (max 8 words). Ask yourself: "What would I search for to find this again?" Prioritize unique/recoverable details — account numbers, IDs, confirmation codes — over generic document type names. Examples:
 - "FedEx Account 203634010" (if the key value is an account number)
 - "Allegiant Confirmation IB2EJA AVL-PIE Feb 2026"
 - "IRS Form 1099-INT Chase Bank 2024"
 Bad: "FedEx Receipt Jan 2026" or "Flight Confirmation" — too generic.
 Also produce a "full_text" field: reformat the raw content as clean Markdown. Use ## headers for sections, | tables | for tabular data, **bold** for field labels, and bullet lists where appropriate. Preserve all values exactly. Do not summarize — include everything.
 Respond in JSON ONLY:
-{"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "summary": "..."}`, filename, text)
+{"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "title": "...", "summary": "...", "full_text": "..."}`, filename, text)
 	reqBody := map[string]interface{}{
 		"model":      "accounts/fireworks/models/kimi-k2-instruct-0905",
-		"max_tokens": 1024,
+		"max_tokens": 2048,
 		"messages": []map[string]interface{}{
 			{"role": "user", "content": prompt},
 		},
@ -249,7 +258,10 @@ Respond in JSON ONLY:
 	if err != nil {
 		return nil, err
 	}
-	analysis.FullText = text
+	// If model didn't produce full_text, fall back to raw extracted text
 	if analysis.FullText == "" {
 		analysis.FullText = text
 	}
 	return analysis, nil
 }
@ -705,6 +717,12 @@ func ProcessDocument(filePath string) (*Document, error) {
 		return nil, fmt.Errorf("store copy failed: %w", err)
 	}
 	// Only set PDFPath for actual PDFs — office/text files have no previewable PDF
 	pdfStorePath := ""
 	if ext == ".pdf" {
 		pdfStorePath = storePath
 	}
 	// Create document record
 	// Use title if provided, fall back to summary
 	title := analysis.Title
@ -722,7 +740,7 @@ func ProcessDocument(filePath string) (*Document, error) {
 		Vendor:       analysis.Vendor,
 		Summary:      analysis.Summary,
 		FullText:     analysis.FullText,
-		PDFPath:      storePath,
+		PDFPath:      pdfStorePath,
 		OriginalFile: filepath.Base(filePath),
 		ProcessedAt:  time.Now().Format(time.RFC3339),
 		Status:       "ready",
@ -838,48 +856,135 @@ func ExtractOfficeText(filePath string) (string, error) {
 	return text, nil
 }
-// xmlToText strips XML tags and decodes entities, returning plain text.
+// xmlToText parses DOCX/Office XML into structured plain text.
 // Understands paragraph breaks, table rows/cells, and text runs.
 func xmlToText(data []byte) string {
 	s := string(data)
 	// Normalise tag names: strip namespace prefix (w:p → p, a:p → p, etc.)
 	// We work on the raw string using simple tag scanning.
 	var sb strings.Builder
 	inTag := false
-	lastWasSpace := false
+	var tagBuf strings.Builder
 	lastWasNewline := false
-	for i := 0; i < len(data); i++ {
+	flushLine := func() {
-		c := data[i]
+		line := strings.TrimSpace(sb.String())
-		switch {
+		if line != "" {
-		case c == '<':
+			sb.Reset()
 			// return the line — caller collects via out builder
 			// (we reuse sb for the whole doc; collect separately)
 		}
 	}
 	_ = flushLine
 	// Two-pass: build a token stream of (tag, text) pairs
 	type token struct {
 		tag  string // lowercased local name, or "" for text
 		text string
 	}
 	var tokens []token
 	for i := 0; i < len(s); i++ {
 		c := s[i]
 		if c == '<' {
 			// flush text buffer
 			if sb.Len() > 0 {
 				tokens = append(tokens, token{text: sb.String()})
 				sb.Reset()
 			}
 			inTag = true
-			// Emit space between elements so words don't run together
+			tagBuf.Reset()
-			if !lastWasSpace {
+			continue
-				sb.WriteByte(' ')
+		}
-				lastWasSpace = true
+		if c == '>' {
 			}
 		case c == '>':
 			inTag = false
-		case !inTag:
+			raw := strings.TrimSpace(tagBuf.String())
-			if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
+			// Extract local name (strip namespace and attributes)
-				if !lastWasSpace {
+			name := raw
-					sb.WriteByte(' ')
+			if idx := strings.IndexAny(raw, " \t\n\r/"); idx >= 0 {
-					lastWasSpace = true
+				name = raw[:idx]
 				}
 			} else {
 				sb.WriteByte(c)
 				lastWasSpace = false
 			}
 			if idx := strings.Index(name, ":"); idx >= 0 {
 				name = name[idx+1:]
 			}
 			name = strings.ToLower(name)
 			closing := strings.HasPrefix(raw, "/")
 			if closing {
 				name = strings.TrimPrefix(name, "/")
 			}
 			tokens = append(tokens, token{tag: name})
 			tagBuf.Reset()
 			continue
 		}
 		if inTag {
 			tagBuf.WriteByte(c)
 		} else {
 			sb.WriteByte(c)
 		}
 	}
 	if sb.Len() > 0 {
 		tokens = append(tokens, token{text: sb.String()})
 	}
 	// Now render tokens into structured text
 	var out strings.Builder
 	pendingNewlines := 0
 	emitNewlines := func(n int) {
 		if n > pendingNewlines {
 			pendingNewlines = n
 		}
 	}
 	flushNewlines := func() {
 		for i := 0; i < pendingNewlines; i++ {
 			out.WriteByte('\n')
 		}
 		pendingNewlines = 0
 		lastWasNewline = pendingNewlines == 0
 	}
 	_ = lastWasNewline
 	inCell := false
 	cellCount := 0
 	for _, tok := range tokens {
 		if tok.tag == "" {
 			// text node
 			txt := tok.text
 			// decode entities
 			txt = strings.ReplaceAll(txt, "&amp;", "&")
 			txt = strings.ReplaceAll(txt, "&lt;", "<")
 			txt = strings.ReplaceAll(txt, "&gt;", ">")
 			txt = strings.ReplaceAll(txt, "&quot;", "\"")
 			txt = strings.ReplaceAll(txt, "&apos;", "'")
 			txt = strings.ReplaceAll(txt, "&#xA;", "\n")
 			txt = strings.ReplaceAll(txt, "&#x9;", "\t")
 			if strings.TrimSpace(txt) != "" {
 				flushNewlines()
 				out.WriteString(txt)
 			}
 			continue
 		}
 		switch tok.tag {
 		case "p": // paragraph
 			emitNewlines(1)
 		case "tr": // table row start
 			inCell = false
 			cellCount = 0
 			emitNewlines(1)
 		case "tc": // table cell
 			if inCell {
 				out.WriteString("\t")
 			}
 			inCell = true
 			cellCount++
 		case "br": // line break
 			emitNewlines(1)
 		}
 	}
-	// Decode common XML entities
+	return strings.TrimSpace(out.String())
 	result := sb.String()
 	result = strings.ReplaceAll(result, "&amp;", "&")
 	result = strings.ReplaceAll(result, "&lt;", "<")
 	result = strings.ReplaceAll(result, "&gt;", ">")
 	result = strings.ReplaceAll(result, "&quot;", "\"")
 	result = strings.ReplaceAll(result, "&apos;", "'")
 	result = strings.ReplaceAll(result, "&#xA;", "\n")
 	result = strings.ReplaceAll(result, "&#x9;", "\t")
 	return strings.TrimSpace(result)
 }
 // IsOfficeFile returns true for formats with extractable XML text.
--- a/ai.go.bak-20260228-005328
+++ b/ai.go.bak-20260228-005328
@ -0,0 +1,891 @@
 package main
 import (
 	"archive/zip"
 	"bytes"
 	"crypto/sha256"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log"
 	"net/http"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"time"
 )
 var (
 	fireworksAPIKey string
 	fireworksBaseURL = "https://api.fireworks.ai/inference/v1"
 )
 func init() {
 	fireworksAPIKey = os.Getenv("FIREWORKS_API_KEY")
 	if fireworksAPIKey == "" {
 		// Try .env file in docsys directory
 		envPath := filepath.Join(os.Getenv("HOME"), "dev/docsys/.env")
 		if data, err := os.ReadFile(envPath); err == nil {
 			for _, line := range strings.Split(string(data), "\n") {
 				if strings.HasPrefix(line, "FIREWORKS_API_KEY=") {
 					fireworksAPIKey = strings.TrimSpace(strings.TrimPrefix(line, "FIREWORKS_API_KEY="))
 					fireworksAPIKey = strings.Trim(fireworksAPIKey, `"'`)
 					break
 				}
 			}
 		}
 	}
 }
 // DocumentAnalysis contains the AI-extracted information
 type DocumentAnalysis struct {
 	Category string      `json:"category"`
 	DocType  string      `json:"doc_type"`
 	Date     string      `json:"date"`
 	Vendor   string      `json:"vendor"`
 	Amount   interface{} `json:"amount"` // Can be string or number
 	Title    string      `json:"title"`
 	Summary  string      `json:"summary"`
 	FullText string      `json:"full_text"`
 }
 func (d *DocumentAnalysis) AmountString() string {
 	switch v := d.Amount.(type) {
 	case string:
 		return v
 	case float64:
 		return fmt.Sprintf("$%.2f", v)
 	default:
 		return ""
 	}
 }
 // FileHash returns first 16 chars of SHA256 hash
 func FileHash(filepath string) (string, error) {
 	f, err := os.Open(filepath)
 	if err != nil {
 		return "", err
 	}
 	defer f.Close()
 	h := sha256.New()
 	if _, err := io.Copy(h, f); err != nil {
 		return "", err
 	}
 	return fmt.Sprintf("%x", h.Sum(nil))[:16], nil
 }
 // ConvertToImage converts PDF/Office docs to PNG for vision API
 func ConvertToImage(filePath string) ([]byte, error) {
 	ext := strings.ToLower(filepath.Ext(filePath))
 	// Office documents → PDF first
 	officeExts := map[string]bool{".doc": true, ".docx": true, ".odt": true, ".rtf": true, ".xls": true, ".xlsx": true, ".ppt": true, ".pptx": true}
 	if officeExts[ext] {
 		tmpDir, err := os.MkdirTemp("", "docsys")
 		if err != nil {
 			return nil, err
 		}
 		defer os.RemoveAll(tmpDir)
 		cmd := exec.Command("libreoffice", "--headless", "--convert-to", "pdf", "--outdir", tmpDir, filePath)
 		if err := cmd.Run(); err != nil {
 			return nil, fmt.Errorf("libreoffice conversion failed: %w", err)
 		}
 		base := strings.TrimSuffix(filepath.Base(filePath), ext)
 		pdfPath := filepath.Join(tmpDir, base+".pdf")
 		filePath = pdfPath
 		ext = ".pdf"
 	}
 	// PDF → PNG (first page only for preview, full processing done separately)
 	if ext == ".pdf" {
 		tmpDir, err := os.MkdirTemp("", "docsys")
 		if err != nil {
 			return nil, err
 		}
 		defer os.RemoveAll(tmpDir)
 		// Convert first page for initial analysis
 		outPrefix := filepath.Join(tmpDir, "page")
 		cmd := exec.Command("pdftoppm", "-png", "-f", "1", "-l", "1", "-r", "150", filePath, outPrefix)
 		if err := cmd.Run(); err != nil {
 			return nil, fmt.Errorf("pdftoppm failed: %w", err)
 		}
 		pngPath := filepath.Join(tmpDir, "page-1.png")
 		return os.ReadFile(pngPath)
 	}
 	// Image files — read directly
 	return os.ReadFile(filePath)
 }
 // IsTextFile returns true for plain text files
 func IsTextFile(ext string) bool {
 	textExts := map[string]bool{
 		".txt": true, ".md": true, ".markdown": true, ".text": true, ".log": true,
 		".json": true, ".xml": true, ".csv": true, ".yaml": true, ".yml": true,
 	}
 	return textExts[ext]
 }
 // AnalyzeWithVision uses K2.5 vision model
 func AnalyzeWithVision(imageData []byte) (*DocumentAnalysis, error) {
 	if fireworksAPIKey == "" {
 		return nil, fmt.Errorf("FIREWORKS_API_KEY not set")
 	}
 	b64 := base64.StdEncoding.EncodeToString(imageData)
 	prompt := `Analyze this document image and extract:
 1. **Full Text**: Transcribe ALL visible text EXACTLY as it appears — in the ORIGINAL language of the document. DO NOT translate. DO NOT summarize. Transcribe word-for-word in the source language (Russian, Dutch, German, French, etc.) formatted as clean Markdown:
   - Use headers (##) for sections
   - Use **bold** for labels/field names
   - Use tables for tabular data (items, prices, etc.)
   - Use bullet lists where appropriate
   - Preserve ALL numbers, dates, amounts, and codes exactly as shown
 2. **Classification**: Categorize into exactly ONE of:
   taxes, bills, medical, insurance, legal, financial, expenses, vehicles, home, personal, contacts, uncategorized
 3. **Document Type**: Specific type (e.g., "utility_bill", "receipt", "tax_form_w2")
 4. **Key Fields** (these may be in English for searchability):
   - date: Document date (YYYY-MM-DD if possible)
   - vendor: Company/organization name
   - amount: Dollar/currency amount if present (e.g., "$123.45" or "809,400 BYN")
 5. **Title**: Specific English title (max 8 words) that identifies THIS document uniquely. Include the key distinguishing details: who sent it, what it's about, and when. Bad: "Financial Report" or "Invoice". Good: "N-able Technology Exchange Rate Loss Explanation Feb 2025" or "Duke Energy Electric Bill Oct 2024" or "IRS Form 1099-INT Chase Bank 2024" or "BayCare HomeCare Invoice $340 Nov 2025". Never use generic words like "Document", "Letter", "Report" alone — always qualify them.
 6. **Summary**: 1-2 sentence English description with key details.
 IMPORTANT: The full_text field MUST contain the verbatim transcription in the document's original language. This is non-negotiable.
 **Known proper nouns** (use these exact spellings when you see similar handwriting):
 - Jongsma (surname — may look like "Jongoma", "Jongsoma", "Jongma")
 - Johan (first name)
 - Tatyana / Tanya (first name)
 - St. Petersburg, Florida (city — not Russia)
 Respond in JSON ONLY:
 {"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "title": "...", "summary": "...", "full_text": "..."}`
 	reqBody := map[string]interface{}{
 		"model":      "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct",
 		"max_tokens": 4096,
 		"messages": []map[string]interface{}{
 			{"role": "system", "content": "You are a document analysis API. Output ONLY raw JSON. No thinking, no commentary, no code fences. First character must be {, last character must be }."},
 			{
 				"role": "user",
 				"content": []map[string]interface{}{
 					{"type": "image_url", "image_url": map[string]string{"url": "data:image/png;base64," + b64}},
 					{"type": "text", "text": prompt},
 				},
 			},
 		},
 	}
 	analysis, err := callFireworks(reqBody)
 	if err != nil {
 		// Retry once with minimal prompt to avoid triggering extended reasoning
 		log.Printf("  [AI] First attempt failed, retrying with simplified prompt...")
 		retryBody := map[string]interface{}{
 			"model":      "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct",
 			"max_tokens": 4096,
 			"messages": []map[string]interface{}{
 				{"role": "system", "content": "Output valid JSON only. No other text."},
 				{
 					"role": "user",
 					"content": []map[string]interface{}{
 						{"type": "image_url", "image_url": map[string]string{"url": "data:image/png;base64," + b64}},
 						{"type": "text", "text": `Look at this document image. Transcribe ALL text verbatim (original language, do NOT translate). Return ONLY this JSON with no placeholders:
 {"category":"","doc_type":"","date":"","vendor":"","amount":"","title":"","summary":"","full_text":""}`},
 					},
 				},
 			},
 		}
 		return callFireworks(retryBody)
 	}
 	return analysis, nil
 }
 // AnalyzeText uses K2 text model for plain text files
 func AnalyzeText(text, filename string) (*DocumentAnalysis, error) {
 	if fireworksAPIKey == "" {
 		return nil, fmt.Errorf("FIREWORKS_API_KEY not set")
 	}
 	// Truncate long text
 	if len(text) > 50000 {
 		text = text[:50000]
 	}
 	prompt := fmt.Sprintf(`Analyze this document:
 **Filename:** %s
 **Content:**
 %s
 Categorize into ONE of: taxes, bills, medical, insurance, legal, financial, expenses, vehicles, home, personal, contacts, uncategorized
 Respond in JSON ONLY:
 {"category": "...", "doc_type": "...", "date": "...", "vendor": "...", "amount": "...", "summary": "..."}`, filename, text)
 	reqBody := map[string]interface{}{
 		"model":      "accounts/fireworks/models/kimi-k2-instruct-0905",
 		"max_tokens": 1024,
 		"messages": []map[string]interface{}{
 			{"role": "user", "content": prompt},
 		},
 	}
 	analysis, err := callFireworks(reqBody)
 	if err != nil {
 		return nil, err
 	}
 	analysis.FullText = text
 	return analysis, nil
 }
 func callFireworks(reqBody map[string]interface{}) (*DocumentAnalysis, error) {
 	jsonBody, _ := json.Marshal(reqBody)
 	req, _ := http.NewRequest("POST", fireworksBaseURL+"/chat/completions", bytes.NewReader(jsonBody))
 	req.Header.Set("Authorization", "Bearer "+fireworksAPIKey)
 	req.Header.Set("Content-Type", "application/json")
 	client := &http.Client{Timeout: 120 * time.Second}
 	resp, err := client.Do(req)
 	if err != nil {
 		return nil, err
 	}
 	defer resp.Body.Close()
 	if resp.StatusCode != 200 {
 		body, _ := io.ReadAll(resp.Body)
 		return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, string(body))
 	}
 	respBody, _ := io.ReadAll(resp.Body)
 	var result struct {
 		Choices []struct {
 			Message struct {
 				Content          string `json:"content"`
 				ReasoningContent string `json:"reasoning_content"`
 			} `json:"message"`
 		} `json:"choices"`
 	}
 	if err := json.Unmarshal(respBody, &result); err != nil {
 		return nil, err
 	}
 	if len(result.Choices) == 0 {
 		return nil, fmt.Errorf("no response from API")
 	}
 	content := result.Choices[0].Message.Content
 	reasoning := result.Choices[0].Message.ReasoningContent
 	// K2.5 reasoning mode: actual JSON may be in content or reasoning_content
 	// Try content first, if it doesn't look like JSON, try reasoning_content
 	if !strings.Contains(content, "{") && reasoning != "" && strings.Contains(reasoning, "{") {
 		log.Printf("  [AI] Using reasoning_content (content had no JSON)")
 		content = reasoning
 	}
 	// Strip markdown code fences (```json ... ``` or ``` ... ```)
 	content = strings.TrimSpace(content)
 	if strings.HasPrefix(content, "```") {
 		// Remove opening fence (```json or ```)
 		if idx := strings.Index(content, "\n"); idx >= 0 {
 			content = content[idx+1:]
 		}
 		// Remove closing fence
 		if idx := strings.LastIndex(content, "```"); idx >= 0 {
 			content = content[:idx]
 		}
 		content = strings.TrimSpace(content)
 	}
 	// Extract JSON from response
 	if idx := strings.Index(content, "{"); idx >= 0 {
 		if end := strings.LastIndex(content, "}"); end > idx {
 			content = content[idx : end+1]
 		}
 	}
 	var analysis DocumentAnalysis
 	if err := json.Unmarshal([]byte(content), &analysis); err != nil {
 		// Last resort: try to find a JSON object with braces matching
 		cleaned := extractJSONObject(content)
 		if cleaned != "" {
 			if err2 := json.Unmarshal([]byte(cleaned), &analysis); err2 != nil {
 				log.Printf("  [AI debug] Failed to parse even after cleanup. Content starts: %.200s", content)
 				return nil, fmt.Errorf("failed to parse response: %w", err)
 			}
 		} else {
 			log.Printf("  [AI debug] No JSON object found in response. Content starts: %.200s", content)
 			return nil, fmt.Errorf("failed to parse response: %w", err)
 		}
 	}
 	// Validate category
 	validCats := map[string]bool{"taxes": true, "bills": true, "medical": true, "insurance": true, "legal": true, "financial": true, "expenses": true, "vehicles": true, "home": true, "personal": true, "contacts": true, "uncategorized": true}
 	if !validCats[analysis.Category] {
 		analysis.Category = "uncategorized"
 	}
 	return &analysis, nil
 }
 // extractJSONObject tries to find a balanced JSON object in a string
 func extractJSONObject(s string) string {
 	start := strings.Index(s, "{")
 	if start < 0 {
 		return ""
 	}
 	depth := 0
 	inString := false
 	escaped := false
 	for i := start; i < len(s); i++ {
 		c := s[i]
 		if escaped {
 			escaped = false
 			continue
 		}
 		if c == '\\' && inString {
 			escaped = true
 			continue
 		}
 		if c == '"' {
 			inString = !inString
 			continue
 		}
 		if inString {
 			continue
 		}
 		if c == '{' {
 			depth++
 		} else if c == '}' {
 			depth--
 			if depth == 0 {
 				return s[start : i+1]
 			}
 		}
 	}
 	return ""
 }
 // GenerateEmbedding creates a vector embedding using Fireworks
 func GenerateEmbedding(text string) ([]float32, error) {
 	if fireworksAPIKey == "" {
 		return nil, fmt.Errorf("FIREWORKS_API_KEY not set")
 	}
 	// Truncate
 	if len(text) > 32000 {
 		text = text[:32000]
 	}
 	reqBody := map[string]interface{}{
 		"model": "fireworks/qwen3-embedding-8b",
 		"input": text,
 	}
 	jsonBody, _ := json.Marshal(reqBody)
 	req, _ := http.NewRequest("POST", fireworksBaseURL+"/embeddings", bytes.NewReader(jsonBody))
 	req.Header.Set("Authorization", "Bearer "+fireworksAPIKey)
 	req.Header.Set("Content-Type", "application/json")
 	client := &http.Client{Timeout: 30 * time.Second}
 	resp, err := client.Do(req)
 	if err != nil {
 		return nil, err
 	}
 	defer resp.Body.Close()
 	if resp.StatusCode != 200 {
 		body, _ := io.ReadAll(resp.Body)
 		return nil, fmt.Errorf("embedding API error %d: %s", resp.StatusCode, string(body))
 	}
 	var result struct {
 		Data []struct {
 			Embedding []float32 `json:"embedding"`
 		} `json:"data"`
 	}
 	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
 		return nil, err
 	}
 	if len(result.Data) == 0 {
 		return nil, fmt.Errorf("no embedding returned")
 	}
 	return result.Data[0].Embedding, nil
 }
 // GetPDFPageCount returns the number of pages in a PDF
 func GetPDFPageCount(filePath string) int {
 	cmd := exec.Command("pdfinfo", filePath)
 	out, err := cmd.Output()
 	if err != nil {
 		return 1
 	}
 	for _, line := range strings.Split(string(out), "\n") {
 		if strings.HasPrefix(line, "Pages:") {
 			var count int
 			fmt.Sscanf(line, "Pages: %d", &count)
 			return count
 		}
 	}
 	return 1
 }
 // ProcessPDFPageByPage extracts text from each page separately
 func ProcessPDFPageByPage(filePath string, jobID string) (string, error) {
 	pageCount := GetPDFPageCount(filePath)
 	log.Printf("  Processing %d pages separately...", pageCount)
 	var allText strings.Builder
 	for page := 1; page <= pageCount; page++ {
 		UpdateJob(jobID, "ocr", fmt.Sprintf("Page %d/%d", page, pageCount))
 		tmpDir, err := os.MkdirTemp("", "docsys-page")
 		if err != nil {
 			continue
 		}
 		// Convert single page to PNG
 		outPrefix := filepath.Join(tmpDir, "page")
 		cmd := exec.Command("pdftoppm", "-png", "-f", fmt.Sprintf("%d", page), "-l", fmt.Sprintf("%d", page), "-r", "150", filePath, outPrefix)
 		if err := cmd.Run(); err != nil {
 			os.RemoveAll(tmpDir)
 			continue
 		}
 		pngPath := filepath.Join(tmpDir, fmt.Sprintf("page-%d.png", page))
 		imageData, err := os.ReadFile(pngPath)
 		os.RemoveAll(tmpDir)
 		if err != nil {
 			continue
 		}
 		// OCR this page
 		log.Printf("  Page %d/%d...", page, pageCount)
 		pageAnalysis, err := AnalyzePageOnly(imageData, page)
 		if err != nil {
 			log.Printf("  Page %d failed: %v", page, err)
 			continue
 		}
 		if pageAnalysis != "" {
 			allText.WriteString(fmt.Sprintf("\n\n---\n## Page %d\n\n", page))
 			allText.WriteString(pageAnalysis)
 		}
 	}
 	return allText.String(), nil
 }
 // AnalyzePageOnly extracts just the text from a single page image
 func AnalyzePageOnly(imageData []byte, pageNum int) (string, error) {
 	if fireworksAPIKey == "" {
 		return "", fmt.Errorf("FIREWORKS_API_KEY not set")
 	}
 	b64 := base64.StdEncoding.EncodeToString(imageData)
 	prompt := `Transcribe ALL visible text on this page EXACTLY as it appears, in the ORIGINAL language of the document. DO NOT translate. DO NOT summarize. Output ONLY the transcribed text — no commentary, no analysis, no preamble, no "The document is..." sentences. Start directly with the content.
 FORMAT: Use ### for sections, **bold** for labels, markdown tables for tabular data, - bullets for lists. Preserve ALL numbers, dates, amounts, and values exactly as shown. If the document is in Russian, Dutch, German, French, or any other language — keep it in that language.`
 	reqBody := map[string]interface{}{
 		"model":      "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct",
 		"max_tokens": 4096,
 		"messages": []map[string]interface{}{
 			{
 				"role": "user",
 				"content": []map[string]interface{}{
 					{"type": "image_url", "image_url": map[string]string{"url": "data:image/png;base64," + b64}},
 					{"type": "text", "text": prompt},
 				},
 			},
 		},
 	}
 	jsonBody, _ := json.Marshal(reqBody)
 	req, _ := http.NewRequest("POST", fireworksBaseURL+"/chat/completions", bytes.NewReader(jsonBody))
 	req.Header.Set("Authorization", "Bearer "+fireworksAPIKey)
 	req.Header.Set("Content-Type", "application/json")
 	client := &http.Client{Timeout: 120 * time.Second}
 	resp, err := client.Do(req)
 	if err != nil {
 		return "", err
 	}
 	defer resp.Body.Close()
 	if resp.StatusCode != 200 {
 		body, _ := io.ReadAll(resp.Body)
 		return "", fmt.Errorf("API error %d: %s", resp.StatusCode, string(body))
 	}
 	// Read raw response to debug content vs reasoning_content
 	rawBody, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return "", err
 	}
 	var result struct {
 		Choices []struct {
 			Message struct {
 				Content          string `json:"content"`
 				ReasoningContent string `json:"reasoning_content"`
 			} `json:"message"`
 		} `json:"choices"`
 	}
 	if err := json.Unmarshal(rawBody, &result); err != nil {
 		return "", err
 	}
 	if len(result.Choices) == 0 {
 		return "", fmt.Errorf("no response")
 	}
 	content := result.Choices[0].Message.Content
 	reasoning := result.Choices[0].Message.ReasoningContent
 	if reasoning != "" {
 		log.Printf("  [OCR debug] reasoning_content length: %d, content length: %d", len(reasoning), len(content))
 		if len(content) > 100 {
 			log.Printf("  [OCR debug] content starts: %.100s", content)
 		}
 	}
 	// If content is empty but reasoning has text, model put everything in wrong field
 	if strings.TrimSpace(content) == "" && reasoning != "" {
 		log.Printf("  [OCR debug] WARNING: content empty, using reasoning_content")
 		content = reasoning
 	}
 	return strings.TrimSpace(content), nil
 }
 // ProcessDocument handles the full document processing pipeline
 func ProcessDocument(filePath string) (*Document, error) {
 	log.Printf("Processing: %s", filepath.Base(filePath))
 	ext := strings.ToLower(filepath.Ext(filePath))
 	// Get file hash
 	hash, err := FileHash(filePath)
 	if err != nil {
 		return nil, fmt.Errorf("hash failed: %w", err)
 	}
 	log.Printf("  Hash: %s", hash)
 	// Start progress tracking
 	StartJob(hash, filepath.Base(filePath))
 	defer FinishJob(hash)
 	// Check if already fully processed (not pending)
 	if existing, _ := GetDocument(hash); existing != nil && existing.Status == "ready" {
 		log.Printf("  Already exists, skipping")
 		os.Remove(filePath)
 		return existing, nil
 	}
 	var analysis *DocumentAnalysis
 	if IsTextFile(ext) {
 		// Plain text — read and analyze
 		data, err := os.ReadFile(filePath)
 		if err != nil {
 			return nil, err
 		}
 		UpdateJob(hash, "classifying", "Analyzing text...")
 		log.Printf("  Analyzing text with K2...")
 		analysis, err = AnalyzeText(string(data), filepath.Base(filePath))
 		if err != nil {
 			UpdateJob(hash, "error", err.Error())
 			return nil, fmt.Errorf("text analysis failed: %w", err)
 		}
 	} else if IsOfficeFile(ext) {
 		// Office formats — extract text natively from ZIP/XML, no LibreOffice needed
 		UpdateJob(hash, "converting", "Extracting text...")
 		log.Printf("  Extracting text from %s...", ext)
 		text, err := ExtractOfficeText(filePath)
 		if err != nil {
 			UpdateJob(hash, "error", err.Error())
 			return nil, fmt.Errorf("office text extraction failed: %w", err)
 		}
 		log.Printf("  Extracted %d chars, classifying...", len(text))
 		UpdateJob(hash, "classifying", "Classifying...")
 		analysis, err = AnalyzeText(text, filepath.Base(filePath))
 		if err != nil {
 			UpdateJob(hash, "error", err.Error())
 			return nil, fmt.Errorf("text analysis failed: %w", err)
 		}
 	} else {
 		// Vision — convert to image and analyze
 		UpdateJob(hash, "converting", "Converting to image...")
 		log.Printf("  Converting to image...")
 		imageData, err := ConvertToImage(filePath)
 		if err != nil {
 			UpdateJob(hash, "error", err.Error())
 			return nil, fmt.Errorf("image conversion failed: %w", err)
 		}
 		UpdateJob(hash, "ocr", "Analyzing first page...")
 		log.Printf("  Analyzing with K2.5 vision...")
 		analysis, err = AnalyzeWithVision(imageData)
 		if err != nil {
 			// Vision JSON extraction failed — fall back to two-step: plain-text OCR + text classifier
 			log.Printf("  AnalyzeWithVision failed (%v), falling back to OCR+classify...", err)
 			UpdateJob(hash, "ocr", "Falling back to OCR + classify...")
 			pageText, ocrErr := AnalyzePageOnly(imageData, 1)
 			if ocrErr != nil {
 				UpdateJob(hash, "error", ocrErr.Error())
 				return nil, fmt.Errorf("vision analysis failed (primary: %v, fallback: %w)", err, ocrErr)
 			}
 			// Classify the extracted text
 			log.Printf("  OCR succeeded (%d chars), classifying...", len(pageText))
 			UpdateJob(hash, "classifying", "Classifying extracted text...")
 			analysis, err = AnalyzeText(pageText, filepath.Base(filePath))
 			if err != nil {
 				// Use minimal stub so at least the doc is stored with its text
 				log.Printf("  Classification failed too: %v — storing with minimal metadata", err)
 				analysis = &DocumentAnalysis{
 					Category: "uncategorized",
 					DocType:  "unknown",
 					Title:    strings.TrimSuffix(filepath.Base(filePath), filepath.Ext(filePath)),
 					Summary:  "Extraction failed — stored raw text only",
 					FullText: pageText,
 				}
 			} else {
 				analysis.FullText = pageText
 			}
 		}
 		// For PDFs, process pages for accurate OCR
 		if ext == ".pdf" {
 			pageCount := GetPDFPageCount(filePath)
 			if pageCount > 1 {
 				log.Printf("  Multi-page PDF detected (%d pages)", pageCount)
 				UpdateJob(hash, "ocr", fmt.Sprintf("Multi-page PDF: %d pages", pageCount))
 				fullText, err := ProcessPDFPageByPage(filePath, hash)
 				if err == nil && fullText != "" {
 					analysis.FullText = fullText
 				}
 			} else if analysis.FullText == "" || len(analysis.FullText) < 50 || strings.HasPrefix(analysis.FullText, "[") {
 				// Single-page but full_text is empty/placeholder — retry with AnalyzePageOnly
 				log.Printf("  Single-page PDF with bad full_text (%q) — retrying with AnalyzePageOnly...", analysis.FullText)
 				UpdateJob(hash, "ocr", "Retrying text extraction...")
 				if pageText, err := AnalyzePageOnly(imageData, 1); err == nil && pageText != "" {
 					analysis.FullText = pageText
 				} else if err != nil {
 					log.Printf("  AnalyzePageOnly fallback failed: %v", err)
 				}
 			}
 		}
 	}
 	log.Printf("  Category: %s, Type: %s", analysis.Category, analysis.DocType)
 	// Copy to store
 	storePath := filepath.Join(storeDir, hash+ext)
 	if err := copyFile(filePath, storePath); err != nil {
 		return nil, fmt.Errorf("store copy failed: %w", err)
 	}
 	// Create document record
 	// Use title if provided, fall back to summary
 	title := analysis.Title
 	if title == "" {
 		title = analysis.Summary
 	}
 	doc := &Document{
 		ID:           hash,
 		Title:        title,
 		Category:     analysis.Category,
 		Type:         analysis.DocType,
 		Date:         analysis.Date,
 		Amount:       analysis.AmountString(),
 		Vendor:       analysis.Vendor,
 		Summary:      analysis.Summary,
 		FullText:     analysis.FullText,
 		PDFPath:      storePath,
 		OriginalFile: filepath.Base(filePath),
 		ProcessedAt:  time.Now().Format(time.RFC3339),
 		Status:       "ready",
 	}
 	// Save to database
 	if err := InsertDocument(doc); err != nil {
 		return nil, fmt.Errorf("db insert failed: %w", err)
 	}
 	// Generate embedding
 	if analysis.FullText != "" {
 		UpdateJob(hash, "embedding", "Generating search index...")
 		log.Printf("  Generating embedding...")
 		if emb, err := GenerateEmbedding(analysis.FullText); err == nil {
 			log.Printf("  Embedding: %d dimensions", len(emb))
 			StoreEmbedding(hash, emb)
 		} else {
 			log.Printf("  Embedding failed: %v", err)
 		}
 	}
 	// Remove from inbox
 	os.Remove(filePath)
 	log.Printf("  ✓ Done: %s/%s", analysis.Category, hash)
 	return doc, nil
 }
 func copyFile(src, dst string) error {
 	in, err := os.Open(src)
 	if err != nil {
 		return err
 	}
 	defer in.Close()
 	out, err := os.Create(dst)
 	if err != nil {
 		return err
 	}
 	defer out.Close()
 	_, err = io.Copy(out, in)
 	return err
 }
 // ExtractOfficeText extracts plain text from DOCX/XLSX/PPTX natively.
 // These are ZIP archives containing XML — no LibreOffice needed.
 func ExtractOfficeText(filePath string) (string, error) {
 	ext := strings.ToLower(filepath.Ext(filePath))
 	r, err := zip.OpenReader(filePath)
 	if err != nil {
 		return "", fmt.Errorf("not a valid Office file: %w", err)
 	}
 	defer r.Close()
 	// Which XML paths to extract per format
 	var targets []string
 	switch ext {
 	case ".docx", ".doc", ".odt", ".rtf":
 		targets = []string{"word/document.xml", "word/body.xml"}
 	case ".xlsx", ".xls":
 		// All sheets
 		for _, f := range r.File {
 			if strings.HasPrefix(f.Name, "xl/worksheets/sheet") && strings.HasSuffix(f.Name, ".xml") {
 				targets = append(targets, f.Name)
 			}
 		}
 		// Shared strings (cell values are stored here for xlsx)
 		targets = append(targets, "xl/sharedStrings.xml")
 	case ".pptx", ".ppt":
 		for _, f := range r.File {
 			if strings.HasPrefix(f.Name, "ppt/slides/slide") && strings.HasSuffix(f.Name, ".xml") {
 				targets = append(targets, f.Name)
 			}
 		}
 	default:
 		return "", fmt.Errorf("unsupported office format: %s", ext)
 	}
 	var sb strings.Builder
 	for _, f := range r.File {
 		found := false
 		for _, t := range targets {
 			if f.Name == t {
 				found = true
 				break
 			}
 		}
 		if !found {
 			continue
 		}
 		rc, err := f.Open()
 		if err != nil {
 			continue
 		}
 		data, err := io.ReadAll(rc)
 		rc.Close()
 		if err != nil {
 			continue
 		}
 		sb.WriteString(xmlToText(data))
 		sb.WriteString("\n")
 	}
 	text := strings.TrimSpace(sb.String())
 	if text == "" {
 		return "", fmt.Errorf("no text extracted from %s", filepath.Base(filePath))
 	}
 	return text, nil
 }
 // xmlToText strips XML tags and decodes entities, returning plain text.
 func xmlToText(data []byte) string {
 	var sb strings.Builder
 	inTag := false
 	lastWasSpace := false
 	for i := 0; i < len(data); i++ {
 		c := data[i]
 		switch {
 		case c == '<':
 			inTag = true
 			// Emit space between elements so words don't run together
 			if !lastWasSpace {
 				sb.WriteByte(' ')
 				lastWasSpace = true
 			}
 		case c == '>':
 			inTag = false
 		case !inTag:
 			if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
 				if !lastWasSpace {
 					sb.WriteByte(' ')
 					lastWasSpace = true
 				}
 			} else {
 				sb.WriteByte(c)
 				lastWasSpace = false
 			}
 		}
 	}
 	// Decode common XML entities
 	result := sb.String()
 	result = strings.ReplaceAll(result, "&amp;", "&")
 	result = strings.ReplaceAll(result, "&lt;", "<")
 	result = strings.ReplaceAll(result, "&gt;", ">")
 	result = strings.ReplaceAll(result, "&quot;", "\"")
 	result = strings.ReplaceAll(result, "&apos;", "'")
 	result = strings.ReplaceAll(result, "&#xA;", "\n")
 	result = strings.ReplaceAll(result, "&#x9;", "\t")
 	return strings.TrimSpace(result)
 }
 // IsOfficeFile returns true for formats with extractable XML text.
 func IsOfficeFile(ext string) bool {
 	return map[string]bool{
 		".docx": true, ".xlsx": true, ".pptx": true,
 		".odt": true, ".ods": true, ".odp": true,
 	}[ext]
 }
--- a/main.go
+++ b/main.go
@ -72,6 +72,10 @@ func main() {
 		"title":        strings.Title,
 		"safe":         func(s string) template.HTML { return template.HTML(s) },
 		"multiply":     func(a float64, b float64) float64 { return a * b },
 		"isImage": func(filename string) bool {
 			ext := strings.ToLower(filepath.Ext(filename))
 			return ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".gif" || ext == ".webp" || ext == ".tiff"
 		},
 	}
 	r := chi.NewRouter()
@ -84,6 +88,7 @@ func main() {
 	// PDF serving
 	r.Get("/pdf/{hash}", servePDF)
 	r.Get("/img/{hash}", serveImage)
 	// Pages
 	r.Get("/", dashboardHandler)
@ -323,6 +328,26 @@ func servePDF(w http.ResponseWriter, r *http.Request) {
 	http.Error(w, "File not found", http.StatusNotFound)
 }
 func serveImage(w http.ResponseWriter, r *http.Request) {
 	hash := chi.URLParam(r, "hash")
 	mimeTypes := map[string]string{
 		".jpg": "image/jpeg", ".jpeg": "image/jpeg",
 		".png": "image/png", ".gif": "image/gif",
 		".webp": "image/webp", ".tiff": "image/tiff",
 	}
 	for ext, mime := range mimeTypes {
 		path := filepath.Join(storeDir, hash+ext)
 		if _, err := os.Stat(path); err == nil {
 			w.Header().Set("Content-Type", mime)
 			w.Header().Set("Cache-Control", "private, max-age=3600")
 			http.ServeFile(w, r, path)
 			return
 		}
 	}
 	http.NotFound(w, r)
 }
 // sanitizeFilename removes characters unsafe for use in Content-Disposition filenames.
 func sanitizeFilename(name string) string {
 	replacer := strings.NewReplacer(`"`, "'", "/", "-", "\\", "-", "\n", " ", "\r", "")
@ -444,6 +469,7 @@ func ingestHandler(w http.ResponseWriter, r *http.Request) {
 	var req struct {
 		Filename string `json:"filename"`
 		Content  string `json:"content"`
 		URL      string `json:"url"`
 		Source   string `json:"source"`
 		Subject  string `json:"subject"`
 		From     string `json:"from"`
@ -454,16 +480,73 @@ func ingestHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}
-	if req.Filename == "" || req.Content == "" {
+	var data []byte
 		http.Error(w, "filename and content are required", http.StatusBadRequest)
 		return
 	}
-	// Decode base64 content
+	if req.URL != "" {
-	data, err := base64.StdEncoding.DecodeString(req.Content)
+		// Fetch from URL
-	if err != nil {
+		resp, err := http.Get(req.URL)
-		http.Error(w, "Invalid base64 content", http.StatusBadRequest)
+		if err != nil {
-		return
+			http.Error(w, "Failed to fetch URL: "+err.Error(), http.StatusBadGateway)
 			return
 		}
 		defer resp.Body.Close()
 		if resp.StatusCode >= 400 {
 			http.Error(w, fmt.Sprintf("URL returned %d", resp.StatusCode), http.StatusBadGateway)
 			return
 		}
 		data, err = io.ReadAll(resp.Body)
 		if err != nil {
 			http.Error(w, "Failed to read URL content", http.StatusInternalServerError)
 			return
 		}
 		// Derive filename from URL or content-type if not provided
 		if req.Filename == "" {
 			ext := ""
 			ct := resp.Header.Get("Content-Type")
 			switch {
 			case strings.Contains(ct, "jpeg") || strings.Contains(ct, "jpg"):
 				ext = ".jpg"
 			case strings.Contains(ct, "png"):
 				ext = ".png"
 			case strings.Contains(ct, "gif"):
 				ext = ".gif"
 			case strings.Contains(ct, "webp"):
 				ext = ".webp"
 			case strings.Contains(ct, "pdf"):
 				ext = ".pdf"
 			case strings.Contains(ct, "tiff"):
 				ext = ".tiff"
 			default:
 				// Try to get extension from URL path
 				urlPath := resp.Request.URL.Path
 				if e := filepath.Ext(urlPath); e != "" {
 					ext = e
 				} else {
 					ext = ".bin"
 				}
 			}
 			// Use last path segment of URL as base name
 			base := filepath.Base(resp.Request.URL.Path)
 			if base == "." || base == "/" {
 				base = "url-import"
 			}
 			if filepath.Ext(base) == "" {
 				base += ext
 			}
 			req.Filename = base
 		}
 	} else {
 		if req.Filename == "" || req.Content == "" {
 			http.Error(w, "filename and content required, or provide url", http.StatusBadRequest)
 			return
 		}
 		// Decode base64 content
 		var err error
 		data, err = base64.StdEncoding.DecodeString(req.Content)
 		if err != nil {
 			http.Error(w, "Invalid base64 content", http.StatusBadRequest)
 			return
 		}
 	}
 	// Sanitize filename
--- a/templates/dashboard.html
+++ b/templates/dashboard.html
@ -120,7 +120,7 @@
                {{if .Stats.RecentUploads}}
                <div class="divide-y divide-gray-100 dark:divide-gray-700">
                    {{range .Stats.RecentUploads}}
-                    <a href="/document/{{.ID}}" class="flex items-center px-5 py-4 hover:bg-gray-50 dark:hover:bg-gray-700/50 transition-colors group">
+                    <div class="flex items-center px-5 py-4 hover:bg-gray-50 dark:hover:bg-gray-700/50 transition-colors group cursor-pointer" onclick="location.href='/document/{{.ID}}'">
                        <div class="flex-shrink-0 p-2 bg-gray-100 dark:bg-gray-700 rounded-lg mr-4">
                            <svg class="w-6 h-6 text-gray-500 dark:text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"></path>
@ -130,27 +130,34 @@
                            <p class="font-medium text-gray-900 dark:text-white truncate group-hover:text-brand-600 dark:group-hover:text-brand-400 transition-colors">{{.Title}}</p>
                            <p class="text-sm text-gray-500 dark:text-gray-400 truncate">{{truncate .Summary 100}}</p>
                        </div>
-                        <div class="flex-shrink-0 ml-4 text-right">
+                        <div class="flex-shrink-0 ml-4 flex items-center gap-3">
-                            {{if eq .Status "processing"}}
+                            <div class="text-right">
-                            <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-300" data-processing="{{.ID}}">
+                                {{if eq .Status "processing"}}
-                                <svg class="animate-spin -ml-0.5 mr-1.5 h-3 w-3" fill="none" viewBox="0 0 24 24">
+                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-300" data-processing="{{.ID}}">
-                                    <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                                    <svg class="animate-spin -ml-0.5 mr-1.5 h-3 w-3" fill="none" viewBox="0 0 24 24">
-                                    <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+                                        <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
                                        <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
                                    </svg>
                                    Processing
                                </span>
                                {{else if eq .Status "error"}}
                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
                                    Error
                                </span>
                                {{else}}
                                <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-brand-100 dark:bg-brand-900/30 text-brand-700 dark:text-brand-300">
                                    {{title .Category}}
                                </span>
                                {{end}}
                                <p class="text-xs text-gray-400 dark:text-gray-500 mt-1">{{formatDateTime .ProcessedAt}}</p>
                            </div>
                            <button onclick="event.stopPropagation(); deleteDoc('{{.ID}}', '{{.Title}}')" class="opacity-0 group-hover:opacity-100 p-1.5 text-gray-400 hover:text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-all" title="Delete">
                                <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"></path>
                                </svg>
-                                Processing
+                            </button>
                            </span>
                            {{else if eq .Status "error"}}
                            <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
                                Error
                            </span>
                            {{else}}
                            <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-brand-100 dark:bg-brand-900/30 text-brand-700 dark:text-brand-300">
                                {{title .Category}}
                            </span>
                            {{end}}
                            <p class="text-xs text-gray-400 dark:text-gray-500 mt-1">{{formatDateTime .ProcessedAt}}</p>
                        </div>
-                    </a>
+                    </div>
                    {{end}}
                </div>
                {{else}}
@ -292,6 +299,51 @@
        }
    });
    // Clipboard paste — handles Ctrl+V of images/screenshots
    document.addEventListener('paste', async (e) => {
        const items = Array.from(e.clipboardData.items).filter(i => i.type.startsWith('image/'));
        if (!items.length) return;
        e.preventDefault();
        progress.classList.remove('hidden');
        const pendingIds = [];
        for (const item of items) {
            const file = item.getAsFile();
            if (!file) continue;
            const ext = item.type.split('/')[1] || 'png';
            const name = `paste-${Date.now()}.${ext}`;
            try {
                const formData = new FormData();
                formData.append('file', new File([file], name, { type: item.type }));
                const res = await fetch('/api/upload', { method: 'POST', body: formData });
                const data = await res.json();
                if (data.status === 'success') {
                    showToast('✓ Pasted image', 'success');
                    pendingIds.push(data.id);
                } else {
                    showToast('Paste failed', 'error');
                }
            } catch (err) {
                showToast('Paste failed: ' + err.message, 'error');
            }
        }
        if (pendingIds.length > 0) {
            sessionStorage.setItem('pendingDocs', JSON.stringify(pendingIds));
            window.location.reload();
        } else {
            progress.classList.add('hidden');
        }
    });
    async function deleteDoc(id, title) {
        if (!confirm('Delete "' + title + '"?')) return;
        const res = await fetch('/api/document/' + id, { method: 'DELETE' });
        if (res.ok) {
            window.location.reload();
        } else {
            alert('Failed to delete document');
        }
    }
    // Check for pending docs on page load (from web upload)
    const pendingDocsJson = sessionStorage.getItem('pendingDocs');
    if (pendingDocsJson) {
--- a/templates/document.html
+++ b/templates/document.html
@ -26,6 +26,12 @@
            </div>
        </div>
        <div class="flex gap-2">
            <button onclick="deleteDocument()" class="inline-flex items-center px-3 py-2 bg-white dark:bg-gray-800 text-red-600 dark:text-red-400 text-sm font-medium rounded-lg border border-red-200 dark:border-red-800 hover:bg-red-50 dark:hover:bg-red-900/20 transition-all">
                <svg class="w-4 h-4 mr-1.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"></path>
                </svg>
                Delete
            </button>
            <button onclick="toggleEdit()" class="inline-flex items-center px-3 py-2 bg-white dark:bg-gray-800 text-gray-700 dark:text-gray-200 text-sm font-medium rounded-lg border border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700 transition-all">
                <svg class="w-4 h-4 mr-1.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z"></path>
@ -163,7 +169,16 @@
        {{end}}
        <div>
-            {{if .Document.PDFPath}}
+            {{if isImage .Document.OriginalFile}}
            <div class="bg-white dark:bg-gray-800 rounded-2xl shadow-sm border border-gray-100 dark:border-gray-700 overflow-hidden">
                <div class="px-6 py-4 border-b border-gray-100 dark:border-gray-700">
                    <h2 class="font-semibold text-gray-900 dark:text-white">Preview</h2>
                </div>
                <div class="p-4 flex justify-center">
                    <img src="/img/{{.Document.ID}}" alt="{{.Document.Title}}" class="max-w-full rounded-lg shadow-sm" style="max-height:600px;object-fit:contain;">
                </div>
            </div>
            {{else if .Document.PDFPath}}
            <div class="bg-white dark:bg-gray-800 rounded-2xl shadow-sm border border-gray-100 dark:border-gray-700 overflow-hidden">
                <div class="px-6 py-4 border-b border-gray-100 dark:border-gray-700 flex items-center justify-between">
                    <h2 class="font-semibold text-gray-900 dark:text-white">Document Preview</h2>