642 lines
17 KiB
Go
642 lines
17 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/base64"
|
|
"encoding/csv"
|
|
"encoding/json"
|
|
"fmt"
|
|
"html/template"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
"github.com/go-chi/chi/v5/middleware"
|
|
)
|
|
|
|
var (
|
|
tmplFuncs template.FuncMap
|
|
documentsDir string
|
|
recordsDir string
|
|
storeDir string
|
|
indexDir string
|
|
inboxDir string
|
|
)
|
|
|
|
func initPaths() {
|
|
documentsDir = os.Getenv("DOCSYS_DATA_DIR")
|
|
if documentsDir == "" {
|
|
documentsDir = "/srv/docsys"
|
|
}
|
|
recordsDir = filepath.Join(documentsDir, "records")
|
|
storeDir = filepath.Join(documentsDir, "store")
|
|
indexDir = filepath.Join(documentsDir, "index")
|
|
inboxDir = filepath.Join(documentsDir, "inbox")
|
|
}
|
|
|
|
var categories = []string{
|
|
"taxes", "bills", "medical", "insurance", "legal",
|
|
"financial", "expenses", "vehicles", "home", "personal", "contacts", "uncategorized",
|
|
}
|
|
|
|
func main() {
|
|
initPaths()
|
|
|
|
// Initialize database
|
|
dbPath := filepath.Join(indexDir, "docsys.db")
|
|
if err := InitDB(dbPath); err != nil {
|
|
log.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
defer CloseDB()
|
|
|
|
// Note: Markdown record indexing disabled - we now store directly in DB
|
|
// if err := IndexDocumentsFromDirectory(recordsDir, storeDir, categories); err != nil {
|
|
// log.Printf("Warning: Failed to index documents: %v", err)
|
|
// }
|
|
|
|
// Ensure inbox directory exists
|
|
os.MkdirAll(inboxDir, 0755)
|
|
|
|
// Template functions
|
|
tmplFuncs = template.FuncMap{
|
|
"truncate": truncateText,
|
|
"categoryIcon": categoryIcon,
|
|
"formatDate": formatDate,
|
|
"lower": strings.ToLower,
|
|
"title": strings.Title,
|
|
"safe": func(s string) template.HTML { return template.HTML(s) },
|
|
"multiply": func(a float64, b float64) float64 { return a * b },
|
|
}
|
|
|
|
r := chi.NewRouter()
|
|
r.Use(middleware.Logger)
|
|
r.Use(middleware.Recoverer)
|
|
r.Use(middleware.Compress(5))
|
|
|
|
// Static files
|
|
r.Handle("/static/*", http.StripPrefix("/static/", http.FileServer(http.Dir("static"))))
|
|
|
|
// PDF serving
|
|
r.Get("/pdf/{hash}", servePDF)
|
|
|
|
// Pages
|
|
r.Get("/", dashboardHandler)
|
|
r.Get("/browse", browseHandler)
|
|
r.Get("/browse/{category}", browseCategoryHandler)
|
|
r.Get("/document/{id}", documentHandler)
|
|
r.Get("/search", searchHandler)
|
|
|
|
// API endpoints
|
|
r.Post("/api/search", apiSearchHandler)
|
|
r.Get("/api/documents", apiDocumentsHandler)
|
|
r.Get("/api/processing", apiProcessingHandler)
|
|
r.Post("/api/upload", uploadHandler)
|
|
r.Post("/api/ingest", ingestHandler)
|
|
r.Put("/api/document/{id}", updateDocumentHandler)
|
|
r.Delete("/api/document/{id}", deleteDocumentHandler)
|
|
r.Get("/api/export", exportCSVHandler)
|
|
r.Post("/api/share/{id}", createShareHandler)
|
|
r.Delete("/api/share/{token}", deleteShareHandler)
|
|
r.Get("/api/shares/{id}", listSharesHandler)
|
|
r.Get("/s/{token}", publicShareHandler)
|
|
r.Post("/api/reindex", reindexHandler)
|
|
r.Get("/api/debug/stats", debugStatsHandler)
|
|
|
|
// Watch inbox directory for new files (scanner via SFTP, web upload, etc.)
|
|
StartInboxWatcher()
|
|
|
|
port := ":9201"
|
|
log.Printf("🗂️ DocSys starting on http://localhost%s", port)
|
|
log.Printf("📁 Documents: %s", documentsDir)
|
|
log.Fatal(http.ListenAndServe(port, r))
|
|
}
|
|
|
|
// Template helpers
|
|
|
|
func truncateText(s string, n int) string {
|
|
if len(s) <= n {
|
|
return s
|
|
}
|
|
return s[:n] + "..."
|
|
}
|
|
|
|
func categoryIcon(cat string) string {
|
|
icons := map[string]string{
|
|
"taxes": "📋",
|
|
"bills": "💰",
|
|
"medical": "🏥",
|
|
"insurance": "🛡️",
|
|
"legal": "⚖️",
|
|
"financial": "🏦",
|
|
"expenses": "💳",
|
|
"vehicles": "🚗",
|
|
"home": "🏠",
|
|
"personal": "👤",
|
|
"contacts": "📇",
|
|
"uncategorized": "📁",
|
|
}
|
|
if icon, ok := icons[cat]; ok {
|
|
return icon
|
|
}
|
|
return "📄"
|
|
}
|
|
|
|
func formatDate(s string) string {
|
|
formats := []string{
|
|
"2006-01-02T15:04:05.999999",
|
|
"2006-01-02T15:04:05",
|
|
"2006-01-02",
|
|
"January 2, 2006",
|
|
"january 2, 2006",
|
|
}
|
|
for _, f := range formats {
|
|
if t, err := time.Parse(f, s); err == nil {
|
|
return t.Format("Jan 2, 2006")
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Template rendering
|
|
|
|
func renderTemplate(w http.ResponseWriter, name string, data interface{}) {
|
|
tmpl := template.Must(template.New("").Funcs(tmplFuncs).ParseFiles(
|
|
"templates/base.html",
|
|
"templates/"+name+".html",
|
|
))
|
|
if err := tmpl.ExecuteTemplate(w, "base", data); err != nil {
|
|
log.Printf("Template error: %v", err)
|
|
http.Error(w, "Template error", http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
func renderPartial(w http.ResponseWriter, name string, data interface{}) {
|
|
tmpl := template.Must(template.New("").Funcs(tmplFuncs).ParseFiles(
|
|
"templates/partials/" + name + ".html",
|
|
))
|
|
if err := tmpl.ExecuteTemplate(w, "partials/"+name+".html", data); err != nil {
|
|
log.Printf("Template error: %v", err)
|
|
http.Error(w, "Template error", http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
// Page handlers
|
|
|
|
func dashboardHandler(w http.ResponseWriter, r *http.Request) {
|
|
stats, _ := GetStats()
|
|
renderTemplate(w, "dashboard", map[string]interface{}{
|
|
"Title": "Dashboard",
|
|
"Stats": stats,
|
|
"Categories": categories,
|
|
})
|
|
}
|
|
|
|
func browseHandler(w http.ResponseWriter, r *http.Request) {
|
|
renderTemplate(w, "browse", map[string]interface{}{
|
|
"Title": "Browse Documents",
|
|
"Categories": categories,
|
|
"CatStats": GetCategoryStats(categories),
|
|
})
|
|
}
|
|
|
|
func browseCategoryHandler(w http.ResponseWriter, r *http.Request) {
|
|
category := chi.URLParam(r, "category")
|
|
docs, _ := GetDocumentsByCategory(category)
|
|
renderTemplate(w, "category", map[string]interface{}{
|
|
"Title": strings.Title(category),
|
|
"Category": category,
|
|
"Documents": docs,
|
|
})
|
|
}
|
|
|
|
func documentHandler(w http.ResponseWriter, r *http.Request) {
|
|
id := chi.URLParam(r, "id")
|
|
doc, err := GetDocument(id)
|
|
if err != nil {
|
|
http.Error(w, "Document not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
renderTemplate(w, "document", map[string]interface{}{
|
|
"Title": doc.Title,
|
|
"Document": doc,
|
|
"Categories": categories,
|
|
})
|
|
}
|
|
|
|
func searchHandler(w http.ResponseWriter, r *http.Request) {
|
|
query := r.URL.Query().Get("q")
|
|
var docs []Document
|
|
if query != "" {
|
|
// Try FTS first
|
|
docs, _ = SearchDocuments(query, 50)
|
|
// If no keyword results, try semantic search
|
|
if len(docs) == 0 {
|
|
if emb, err := GenerateEmbedding(query); err == nil {
|
|
docs, _ = SemanticSearch(emb, 10)
|
|
}
|
|
}
|
|
}
|
|
renderTemplate(w, "search", map[string]interface{}{
|
|
"Title": "Search",
|
|
"Query": query,
|
|
"Documents": docs,
|
|
})
|
|
}
|
|
|
|
func servePDF(w http.ResponseWriter, r *http.Request) {
|
|
hash := chi.URLParam(r, "hash")
|
|
|
|
// Try PDF first, then TXT
|
|
for _, ext := range []string{".pdf", ".txt"} {
|
|
path := filepath.Join(storeDir, hash+ext)
|
|
if _, err := os.Stat(path); err == nil {
|
|
if ext == ".pdf" {
|
|
w.Header().Set("Content-Type", "application/pdf")
|
|
} else {
|
|
w.Header().Set("Content-Type", "text/plain")
|
|
}
|
|
http.ServeFile(w, r, path)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Try without extension
|
|
path := filepath.Join(storeDir, hash)
|
|
if _, err := os.Stat(path); err == nil {
|
|
http.ServeFile(w, r, path)
|
|
return
|
|
}
|
|
|
|
http.Error(w, "File not found", http.StatusNotFound)
|
|
}
|
|
|
|
// API handlers
|
|
|
|
func apiSearchHandler(w http.ResponseWriter, r *http.Request) {
|
|
query := r.FormValue("q")
|
|
if query == "" {
|
|
w.Write([]byte(""))
|
|
return
|
|
}
|
|
|
|
docs, err := SearchDocuments(query, 50)
|
|
if err != nil {
|
|
// Fallback to simple search
|
|
docs, _ = SearchDocumentsFallback(query, 50)
|
|
}
|
|
|
|
renderPartial(w, "document-list", docs)
|
|
}
|
|
|
|
func apiProcessingHandler(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(GetActiveJobs())
|
|
}
|
|
|
|
func apiDocumentsHandler(w http.ResponseWriter, r *http.Request) {
|
|
category := r.URL.Query().Get("category")
|
|
var docs []Document
|
|
if category != "" {
|
|
docs, _ = GetDocumentsByCategory(category)
|
|
} else {
|
|
docs, _ = GetAllDocuments()
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(docs)
|
|
}
|
|
|
|
func uploadHandler(w http.ResponseWriter, r *http.Request) {
|
|
r.ParseMultipartForm(32 << 20) // 32MB max
|
|
|
|
file, header, err := r.FormFile("file")
|
|
if err != nil {
|
|
http.Error(w, "Failed to read file", http.StatusBadRequest)
|
|
return
|
|
}
|
|
defer file.Close()
|
|
|
|
// Save to inbox
|
|
filename := fmt.Sprintf("%d_%s", time.Now().Unix(), header.Filename)
|
|
destPath := filepath.Join(inboxDir, filename)
|
|
|
|
dest, err := os.Create(destPath)
|
|
if err != nil {
|
|
http.Error(w, "Failed to save file", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
defer dest.Close()
|
|
|
|
io.Copy(dest, file)
|
|
|
|
// Check for duplicate before processing
|
|
hash, _ := FileHash(destPath)
|
|
existingDoc, _ := GetDocument(hash)
|
|
|
|
if existingDoc != nil && existingDoc.Status != "processing" {
|
|
// Document already exists — remove inbox file, return existing
|
|
os.Remove(destPath)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"status": "duplicate",
|
|
"filename": filename,
|
|
"message": "Document already exists in your library.",
|
|
"document": map[string]string{
|
|
"id": existingDoc.ID,
|
|
"title": existingDoc.Title,
|
|
"category": existingDoc.Category,
|
|
},
|
|
})
|
|
return
|
|
}
|
|
|
|
// Create pending document immediately (shows in UI right away)
|
|
InsertPendingDocument(hash, header.Filename)
|
|
|
|
// Process document (async)
|
|
go func() {
|
|
if doc, err := ProcessDocument(destPath); err != nil {
|
|
log.Printf("Process error for %s: %v", filename, err)
|
|
UpdateDocumentStatus(hash, "error")
|
|
} else {
|
|
log.Printf("Processed: %s → %s/%s", filename, doc.Category, doc.ID)
|
|
}
|
|
}()
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"status": "success",
|
|
"filename": filename,
|
|
"id": hash,
|
|
"message": "Processing...",
|
|
})
|
|
}
|
|
|
|
// ingestHandler accepts JSON with base64-encoded file content
|
|
// POST /api/ingest
|
|
// {
|
|
// "filename": "invoice.pdf",
|
|
// "content": "<base64-encoded-data>",
|
|
// "source": "email", // optional metadata
|
|
// "subject": "Your invoice", // optional
|
|
// "from": "billing@example.com" // optional
|
|
// }
|
|
func ingestHandler(w http.ResponseWriter, r *http.Request) {
|
|
var req struct {
|
|
Filename string `json:"filename"`
|
|
Content string `json:"content"`
|
|
Source string `json:"source"`
|
|
Subject string `json:"subject"`
|
|
From string `json:"from"`
|
|
}
|
|
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, "Invalid JSON", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
if req.Filename == "" || req.Content == "" {
|
|
http.Error(w, "filename and content are required", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Decode base64 content
|
|
data, err := base64.StdEncoding.DecodeString(req.Content)
|
|
if err != nil {
|
|
http.Error(w, "Invalid base64 content", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Sanitize filename
|
|
safeName := strings.ReplaceAll(req.Filename, "/", "_")
|
|
safeName = strings.ReplaceAll(safeName, "\\", "_")
|
|
|
|
// Generate unique filename with timestamp
|
|
filename := fmt.Sprintf("%d_%s", time.Now().Unix(), safeName)
|
|
destPath := filepath.Join(inboxDir, filename)
|
|
|
|
// Write file
|
|
if err := os.WriteFile(destPath, data, 0644); err != nil {
|
|
http.Error(w, "Failed to write file", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// Process immediately (async)
|
|
go func() {
|
|
if doc, err := ProcessDocument(destPath); err != nil {
|
|
log.Printf("Process error for %s: %v", filename, err)
|
|
} else {
|
|
// Store email metadata if provided
|
|
if req.Source != "" || req.Subject != "" || req.From != "" {
|
|
doc.Metadata = map[string]string{
|
|
"source": req.Source,
|
|
"subject": req.Subject,
|
|
"from": req.From,
|
|
}
|
|
UpdateDocumentMetadata(doc.ID, doc.Metadata)
|
|
}
|
|
log.Printf("Processed: %s → %s/%s", filename, doc.Category, doc.ID)
|
|
}
|
|
}()
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]string{
|
|
"status": "success",
|
|
"filename": filename,
|
|
"message": "Document ingested. Processing started.",
|
|
})
|
|
}
|
|
|
|
func updateDocumentHandler(w http.ResponseWriter, r *http.Request) {
|
|
id := chi.URLParam(r, "id")
|
|
|
|
var update struct {
|
|
Title string `json:"title"`
|
|
Category string `json:"category"`
|
|
Notes string `json:"notes"`
|
|
}
|
|
|
|
if err := json.NewDecoder(r.Body).Decode(&update); err != nil {
|
|
http.Error(w, "Invalid request", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Get current document to check if category changed
|
|
doc, err := GetDocument(id)
|
|
if err != nil {
|
|
http.Error(w, "Document not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
// Update in database
|
|
if err := UpdateDocument(id, DocumentUpdate{
|
|
Title: update.Title,
|
|
Category: update.Category,
|
|
Notes: update.Notes,
|
|
}); err != nil {
|
|
http.Error(w, "Failed to update", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// Move file if category changed
|
|
if doc.Category != update.Category && doc.RecordPath != "" {
|
|
newDir := filepath.Join(recordsDir, update.Category)
|
|
os.MkdirAll(newDir, 0755)
|
|
newPath := filepath.Join(newDir, filepath.Base(doc.RecordPath))
|
|
if err := os.Rename(doc.RecordPath, newPath); err == nil {
|
|
UpdateDocumentRecordPath(id, newPath)
|
|
}
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]string{"status": "success"})
|
|
}
|
|
|
|
func deleteDocumentHandler(w http.ResponseWriter, r *http.Request) {
|
|
id := chi.URLParam(r, "id")
|
|
|
|
doc, err := GetDocument(id)
|
|
if err != nil {
|
|
http.Error(w, "Document not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
// Delete from database (includes FTS and embeddings)
|
|
DeleteDocument(id)
|
|
|
|
// Delete record file
|
|
if doc.RecordPath != "" {
|
|
os.Remove(doc.RecordPath)
|
|
}
|
|
|
|
// Delete store files (PDF/TXT)
|
|
for _, ext := range []string{".pdf", ".txt", ""} {
|
|
path := filepath.Join(storeDir, id+ext)
|
|
os.Remove(path) // ignore errors for non-existent files
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]string{"status": "deleted"})
|
|
}
|
|
|
|
func exportCSVHandler(w http.ResponseWriter, r *http.Request) {
|
|
category := r.URL.Query().Get("category")
|
|
var docs []Document
|
|
if category != "" {
|
|
docs, _ = GetDocumentsByCategory(category)
|
|
} else {
|
|
docs, _ = GetAllDocuments()
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "text/csv")
|
|
w.Header().Set("Content-Disposition", "attachment; filename=documents.csv")
|
|
|
|
writer := csv.NewWriter(w)
|
|
writer.Write([]string{"ID", "Title", "Category", "Type", "Date", "Amount", "Vendor", "Summary"})
|
|
|
|
for _, doc := range docs {
|
|
writer.Write([]string{
|
|
doc.ID, doc.Title, doc.Category, doc.Type,
|
|
doc.Date, doc.Amount, doc.Vendor, doc.Summary,
|
|
})
|
|
}
|
|
writer.Flush()
|
|
}
|
|
|
|
func debugStatsHandler(w http.ResponseWriter, r *http.Request) {
|
|
stats, err := GetStats()
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"error": err,
|
|
"total": stats.TotalDocs,
|
|
"recent": stats.RecentDocs,
|
|
"uploadsCount": len(stats.RecentUploads),
|
|
"recentUploads": stats.RecentUploads,
|
|
})
|
|
}
|
|
|
|
func createShareHandler(w http.ResponseWriter, r *http.Request) {
|
|
id := chi.URLParam(r, "id")
|
|
var req struct {
|
|
Days int `json:"days"`
|
|
}
|
|
req.Days = 7 // default
|
|
json.NewDecoder(r.Body).Decode(&req)
|
|
|
|
token, err := CreateShare(id, req.Days)
|
|
if err != nil {
|
|
http.Error(w, "Failed to create share", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]string{
|
|
"token": token,
|
|
"url": "/s/" + token + ".pdf",
|
|
})
|
|
}
|
|
|
|
func deleteShareHandler(w http.ResponseWriter, r *http.Request) {
|
|
token := chi.URLParam(r, "token")
|
|
DeleteShare(token)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]string{"status": "deleted"})
|
|
}
|
|
|
|
func listSharesHandler(w http.ResponseWriter, r *http.Request) {
|
|
id := chi.URLParam(r, "id")
|
|
shares, err := GetSharesByDocument(id)
|
|
if err != nil {
|
|
shares = []Share{}
|
|
}
|
|
if shares == nil {
|
|
shares = []Share{}
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(shares)
|
|
}
|
|
|
|
func publicShareHandler(w http.ResponseWriter, r *http.Request) {
|
|
token := chi.URLParam(r, "token")
|
|
token = strings.TrimSuffix(token, ".pdf")
|
|
doc, err := GetShare(token)
|
|
if err != nil || doc == nil {
|
|
http.Error(w, "Not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
// Serve the PDF directly
|
|
for _, ext := range []string{".pdf", ".txt", ""} {
|
|
path := filepath.Join(storeDir, doc.ID+ext)
|
|
if _, err := os.Stat(path); err == nil {
|
|
if ext == ".pdf" || ext == "" {
|
|
w.Header().Set("Content-Type", "application/pdf")
|
|
} else {
|
|
w.Header().Set("Content-Type", "text/plain")
|
|
}
|
|
// Set filename so Android/browsers handle it properly
|
|
filename := strings.ReplaceAll(doc.Title, " ", "-") + ext
|
|
if ext == "" {
|
|
filename += ".pdf"
|
|
}
|
|
w.Header().Set("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, filename))
|
|
http.ServeFile(w, r, path)
|
|
return
|
|
}
|
|
}
|
|
http.Error(w, "File not found", http.StatusNotFound)
|
|
}
|
|
|
|
func reindexHandler(w http.ResponseWriter, r *http.Request) {
|
|
// DISABLED - this was destructive (wiped all docs without repopulating)
|
|
// Old behavior cleared all docs then re-indexed markdown files (which we don't use anymore)
|
|
// TODO: Implement safe reprocessing that doesn't delete existing docs
|
|
log.Printf("Reindex endpoint called but disabled (would wipe all data)")
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]string{"status": "reindexed"})
|
|
}
|