package db import ( "database/sql" "encoding/json" "fmt" "time" _ "github.com/mattn/go-sqlite3" ) type DB struct { *sql.DB } type Document struct { ID string `json:"id"` Filename string `json:"filename"` OriginalName string `json:"original_name"` Category string `json:"category"` Subcategory string `json:"subcategory,omitempty"` Title string `json:"title"` Date *time.Time `json:"date,omitempty"` Vendor string `json:"vendor,omitempty"` Amount *float64 `json:"amount,omitempty"` Currency string `json:"currency,omitempty"` TaxDeductible bool `json:"tax_deductible"` OCRText string `json:"ocr_text"` Metadata json.RawMessage `json:"metadata,omitempty"` Embedding []byte `json:"embedding,omitempty"` StoragePath string `json:"storage_path"` MarkdownPath string `json:"markdown_path"` PageCount int `json:"page_count"` FileSize int64 `json:"file_size"` MimeType string `json:"mime_type"` Checksum string `json:"checksum"` ProcessedAt time.Time `json:"processed_at"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` } type SearchResult struct { Document Score float64 `json:"score"` Snippet string `json:"snippet"` } func Open(path string) (*DB, error) { db, err := sql.Open("sqlite3", path+"?_journal_mode=WAL&_busy_timeout=5000") if err != nil { return nil, err } if err := db.Ping(); err != nil { return nil, err } return &DB{db}, nil } func (db *DB) Init() error { schema := ` CREATE TABLE IF NOT EXISTS documents ( id TEXT PRIMARY KEY, filename TEXT NOT NULL, original_name TEXT NOT NULL, category TEXT NOT NULL DEFAULT 'uncategorized', subcategory TEXT, title TEXT NOT NULL, date TEXT, vendor TEXT, amount REAL, currency TEXT DEFAULT 'USD', tax_deductible INTEGER DEFAULT 0, ocr_text TEXT, metadata TEXT, embedding BLOB, storage_path TEXT NOT NULL, markdown_path TEXT, page_count INTEGER DEFAULT 1, file_size INTEGER, mime_type TEXT, checksum TEXT, processed_at TEXT, created_at TEXT DEFAULT CURRENT_TIMESTAMP, updated_at TEXT DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_documents_category ON documents(category); CREATE INDEX IF NOT EXISTS idx_documents_date ON documents(date); CREATE INDEX IF NOT EXISTS idx_documents_vendor ON documents(vendor); CREATE INDEX IF NOT EXISTS idx_documents_amount ON documents(amount); CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( id, title, ocr_text, vendor, category, content='documents', content_rowid='rowid' ); CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN INSERT INTO documents_fts(id, title, ocr_text, vendor, category) VALUES (new.id, new.title, new.ocr_text, new.vendor, new.category); END; CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN INSERT INTO documents_fts(documents_fts, id, title, ocr_text, vendor, category) VALUES ('delete', old.id, old.title, old.ocr_text, old.vendor, old.category); END; CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents BEGIN INSERT INTO documents_fts(documents_fts, id, title, ocr_text, vendor, category) VALUES ('delete', old.id, old.title, old.ocr_text, old.vendor, old.category); INSERT INTO documents_fts(id, title, ocr_text, vendor, category) VALUES (new.id, new.title, new.ocr_text, new.vendor, new.category); END; CREATE TABLE IF NOT EXISTS settings ( key TEXT PRIMARY KEY, value TEXT ); ` _, err := db.Exec(schema) return err } func (db *DB) InsertDocument(doc *Document) error { var dateStr *string if doc.Date != nil { s := doc.Date.Format("2006-01-02") dateStr = &s } _, err := db.Exec(` INSERT INTO documents ( id, filename, original_name, category, subcategory, title, date, vendor, amount, currency, tax_deductible, ocr_text, metadata, embedding, storage_path, markdown_path, page_count, file_size, mime_type, checksum, processed_at, created_at, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `, doc.ID, doc.Filename, doc.OriginalName, doc.Category, doc.Subcategory, doc.Title, dateStr, doc.Vendor, doc.Amount, doc.Currency, doc.TaxDeductible, doc.OCRText, doc.Metadata, doc.Embedding, doc.StoragePath, doc.MarkdownPath, doc.PageCount, doc.FileSize, doc.MimeType, doc.Checksum, doc.ProcessedAt.Format(time.RFC3339), doc.CreatedAt.Format(time.RFC3339), doc.UpdatedAt.Format(time.RFC3339)) return err } func (db *DB) UpdateDocument(doc *Document) error { var dateStr *string if doc.Date != nil { s := doc.Date.Format("2006-01-02") dateStr = &s } _, err := db.Exec(` UPDATE documents SET category = ?, subcategory = ?, title = ?, date = ?, vendor = ?, amount = ?, currency = ?, tax_deductible = ?, metadata = ?, updated_at = ? WHERE id = ? `, doc.Category, doc.Subcategory, doc.Title, dateStr, doc.Vendor, doc.Amount, doc.Currency, doc.TaxDeductible, doc.Metadata, time.Now().Format(time.RFC3339), doc.ID) return err } func (db *DB) GetDocument(id string) (*Document, error) { row := db.QueryRow(`SELECT * FROM documents WHERE id = ?`, id) return scanDocument(row) } func (db *DB) DeleteDocument(id string) error { _, err := db.Exec(`DELETE FROM documents WHERE id = ?`, id) return err } func (db *DB) ListDocuments(category string, limit, offset int) ([]*Document, error) { var rows *sql.Rows var err error if category != "" { rows, err = db.Query(` SELECT * FROM documents WHERE category = ? ORDER BY COALESCE(date, created_at) DESC LIMIT ? OFFSET ? `, category, limit, offset) } else { rows, err = db.Query(` SELECT * FROM documents ORDER BY COALESCE(date, created_at) DESC LIMIT ? OFFSET ? `, limit, offset) } if err != nil { return nil, err } defer rows.Close() return scanDocuments(rows) } func (db *DB) RecentDocuments(limit int) ([]*Document, error) { rows, err := db.Query(` SELECT * FROM documents ORDER BY created_at DESC LIMIT ? `, limit) if err != nil { return nil, err } defer rows.Close() return scanDocuments(rows) } func (db *DB) SearchFTS(query string, limit int) ([]*SearchResult, error) { rows, err := db.Query(` SELECT d.*, bm25(documents_fts) as score, snippet(documents_fts, 2, '', '', '...', 32) as snippet FROM documents_fts f JOIN documents d ON f.id = d.id WHERE documents_fts MATCH ? ORDER BY bm25(documents_fts) LIMIT ? `, query, limit) if err != nil { return nil, err } defer rows.Close() var results []*SearchResult for rows.Next() { var doc Document var dateStr, processedStr, createdStr, updatedStr sql.NullString var score float64 var snippet string err := rows.Scan( &doc.ID, &doc.Filename, &doc.OriginalName, &doc.Category, &doc.Subcategory, &doc.Title, &dateStr, &doc.Vendor, &doc.Amount, &doc.Currency, &doc.TaxDeductible, &doc.OCRText, &doc.Metadata, &doc.Embedding, &doc.StoragePath, &doc.MarkdownPath, &doc.PageCount, &doc.FileSize, &doc.MimeType, &doc.Checksum, &processedStr, &createdStr, &updatedStr, &score, &snippet, ) if err != nil { return nil, err } if dateStr.Valid { t, _ := time.Parse("2006-01-02", dateStr.String) doc.Date = &t } if createdStr.Valid { t, _ := time.Parse(time.RFC3339, createdStr.String) doc.CreatedAt = t } results = append(results, &SearchResult{Document: doc, Score: score, Snippet: snippet}) } return results, nil } func (db *DB) GetStats() (map[string]interface{}, error) { stats := make(map[string]interface{}) // Total documents var total int db.QueryRow(`SELECT COUNT(*) FROM documents`).Scan(&total) stats["total"] = total // This month var thisMonth int db.QueryRow(`SELECT COUNT(*) FROM documents WHERE created_at >= date('now', 'start of month')`).Scan(&thisMonth) stats["this_month"] = thisMonth // Total size var totalSize int64 db.QueryRow(`SELECT COALESCE(SUM(file_size), 0) FROM documents`).Scan(&totalSize) stats["total_size"] = totalSize // By category rows, err := db.Query(`SELECT category, COUNT(*) FROM documents GROUP BY category`) if err == nil { categories := make(map[string]int) for rows.Next() { var cat string var count int rows.Scan(&cat, &count) categories[cat] = count } rows.Close() stats["by_category"] = categories } return stats, nil } func (db *DB) GetExpenses(year int, month int) ([]*Document, error) { query := ` SELECT * FROM documents WHERE category = 'expenses' AND amount IS NOT NULL ` args := []interface{}{} if year > 0 { query += ` AND strftime('%Y', date) = ?` args = append(args, fmt.Sprintf("%04d", year)) } if month > 0 { query += ` AND strftime('%m', date) = ?` args = append(args, fmt.Sprintf("%02d", month)) } query += ` ORDER BY date DESC` rows, err := db.Query(query, args...) if err != nil { return nil, err } defer rows.Close() return scanDocuments(rows) } func scanDocument(row *sql.Row) (*Document, error) { var doc Document var dateStr, processedStr, createdStr, updatedStr sql.NullString err := row.Scan( &doc.ID, &doc.Filename, &doc.OriginalName, &doc.Category, &doc.Subcategory, &doc.Title, &dateStr, &doc.Vendor, &doc.Amount, &doc.Currency, &doc.TaxDeductible, &doc.OCRText, &doc.Metadata, &doc.Embedding, &doc.StoragePath, &doc.MarkdownPath, &doc.PageCount, &doc.FileSize, &doc.MimeType, &doc.Checksum, &processedStr, &createdStr, &updatedStr, ) if err != nil { return nil, err } if dateStr.Valid { t, _ := time.Parse("2006-01-02", dateStr.String) doc.Date = &t } if processedStr.Valid { t, _ := time.Parse(time.RFC3339, processedStr.String) doc.ProcessedAt = t } if createdStr.Valid { t, _ := time.Parse(time.RFC3339, createdStr.String) doc.CreatedAt = t } if updatedStr.Valid { t, _ := time.Parse(time.RFC3339, updatedStr.String) doc.UpdatedAt = t } return &doc, nil } func scanDocuments(rows *sql.Rows) ([]*Document, error) { var docs []*Document for rows.Next() { var doc Document var dateStr, processedStr, createdStr, updatedStr sql.NullString err := rows.Scan( &doc.ID, &doc.Filename, &doc.OriginalName, &doc.Category, &doc.Subcategory, &doc.Title, &dateStr, &doc.Vendor, &doc.Amount, &doc.Currency, &doc.TaxDeductible, &doc.OCRText, &doc.Metadata, &doc.Embedding, &doc.StoragePath, &doc.MarkdownPath, &doc.PageCount, &doc.FileSize, &doc.MimeType, &doc.Checksum, &processedStr, &createdStr, &updatedStr, ) if err != nil { return nil, err } if dateStr.Valid { t, _ := time.Parse("2006-01-02", dateStr.String) doc.Date = &t } if processedStr.Valid { t, _ := time.Parse(time.RFC3339, processedStr.String) doc.ProcessedAt = t } if createdStr.Valid { t, _ := time.Parse(time.RFC3339, createdStr.String) doc.CreatedAt = t } if updatedStr.Valid { t, _ := time.Parse(time.RFC3339, updatedStr.String) doc.UpdatedAt = t } docs = append(docs, &doc) } return docs, nil }