package main import ( "encoding/base64" "encoding/csv" "encoding/json" "fmt" "html/template" "io" "log" "net/http" "os" "path/filepath" "strings" "time" "github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5/middleware" ) var ( tmplFuncs template.FuncMap documentsDir string recordsDir string storeDir string indexDir string inboxDir string ) func initPaths() { documentsDir = os.Getenv("DOCSYS_DATA_DIR") if documentsDir == "" { documentsDir = "/srv/docsys" } recordsDir = filepath.Join(documentsDir, "records") storeDir = filepath.Join(documentsDir, "store") indexDir = filepath.Join(documentsDir, "index") inboxDir = filepath.Join(documentsDir, "inbox") } var categories = []string{ "taxes", "bills", "medical", "insurance", "legal", "financial", "expenses", "vehicles", "home", "personal", "contacts", "inou", "sophia", "uncategorized", } func main() { initPaths() // Initialize database dbPath := filepath.Join(indexDir, "docsys.db") if err := InitDB(dbPath); err != nil { log.Fatalf("Failed to initialize database: %v", err) } defer CloseDB() // Note: Markdown record indexing disabled - we now store directly in DB // if err := IndexDocumentsFromDirectory(recordsDir, storeDir, categories); err != nil { // log.Printf("Warning: Failed to index documents: %v", err) // } // Ensure inbox directory exists os.MkdirAll(inboxDir, 0755) // Template functions tmplFuncs = template.FuncMap{ "truncate": truncateText, "categoryIcon": categoryIcon, "formatDate": formatDate, "formatDateTime": formatDateTime, "lower": strings.ToLower, "title": strings.Title, "safe": func(s string) template.HTML { return template.HTML(s) }, "multiply": func(a float64, b float64) float64 { return a * b }, "isImage": func(filename string) bool { ext := strings.ToLower(filepath.Ext(filename)) return ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".gif" || ext == ".webp" || ext == ".tiff" }, } r := chi.NewRouter() r.Use(middleware.Logger) r.Use(middleware.Recoverer) r.Use(middleware.Compress(5)) // Static files r.Handle("/static/*", http.StripPrefix("/static/", http.FileServer(http.Dir("static")))) // PDF serving r.Get("/pdf/{hash}", servePDF) r.Get("/img/{hash}", serveImage) // Pages r.Get("/", dashboardHandler) r.Get("/browse", browseHandler) r.Get("/browse/{category}", browseCategoryHandler) r.Get("/document/{id}", documentHandler) r.Get("/search", searchHandler) // API endpoints r.Get("/api/search", apiSearchMDHandler) r.Post("/api/search", apiSearchHandler) r.Get("/api/documents", apiDocumentsHandler) r.Get("/api/processing", apiProcessingHandler) r.Post("/api/upload", uploadHandler) r.Post("/api/ingest", ingestHandler) r.Put("/api/document/{id}", updateDocumentHandler) r.Delete("/api/document/{id}", deleteDocumentHandler) r.Get("/api/export", exportCSVHandler) r.Post("/api/share/{id}", createShareHandler) r.Delete("/api/share/{token}", deleteShareHandler) r.Get("/api/shares/{id}", listSharesHandler) r.Get("/s/{token}", publicShareHandler) r.Post("/api/reindex", reindexHandler) r.Get("/api/debug/stats", debugStatsHandler) // Watch inbox directory for new files (scanner via SFTP, web upload, etc.) StartInboxWatcher() port := ":9201" log.Printf("🗂️ DocSys starting on http://localhost%s", port) log.Printf("📁 Documents: %s", documentsDir) log.Fatal(http.ListenAndServe(port, r)) } // Template helpers func truncateText(s string, n int) string { if len(s) <= n { return s } return s[:n] + "..." } func categoryIcon(cat string) string { icons := map[string]string{ "taxes": "📋", "bills": "💰", "medical": "🏥", "insurance": "🛡️", "legal": "⚖️", "financial": "🏦", "expenses": "💳", "vehicles": "🚗", "home": "🏠", "personal": "👤", "contacts": "📇", "inou": "🏢", "sophia": "👧", "uncategorized": "📁", } if icon, ok := icons[cat]; ok { return icon } return "📄" } func formatDate(s string) string { formats := []string{ "2006-01-02T15:04:05.999999", "2006-01-02T15:04:05", "2006-01-02", "January 2, 2006", "january 2, 2006", } for _, f := range formats { if t, err := time.Parse(f, s); err == nil { return t.Format("Jan 2, 2006") } } return s } func formatDateTime(s string) string { formats := []string{ "2006-01-02T15:04:05-07:00", "2006-01-02T15:04:05.999999-07:00", "2006-01-02T15:04:05.999999", "2006-01-02T15:04:05", "2006-01-02 15:04:05", "2006-01-02", } loc, _ := time.LoadLocation("America/New_York") if loc == nil { loc = time.UTC } for _, f := range formats { if t, err := time.Parse(f, s); err == nil { t = t.In(loc) return t.Format("Jan 02, 2006 3:04 PM MST") } } return s } // Template rendering func renderTemplate(w http.ResponseWriter, name string, data interface{}) { tmpl := template.Must(template.New("").Funcs(tmplFuncs).ParseFiles( "templates/base.html", "templates/"+name+".html", )) if err := tmpl.ExecuteTemplate(w, "base", data); err != nil { log.Printf("Template error: %v", err) http.Error(w, "Template error", http.StatusInternalServerError) } } func renderPartial(w http.ResponseWriter, name string, data interface{}) { tmpl := template.Must(template.New("").Funcs(tmplFuncs).ParseFiles( "templates/partials/" + name + ".html", )) if err := tmpl.ExecuteTemplate(w, "partials/"+name+".html", data); err != nil { log.Printf("Template error: %v", err) http.Error(w, "Template error", http.StatusInternalServerError) } } // Page handlers func dashboardHandler(w http.ResponseWriter, r *http.Request) { stats, _ := GetStats() renderTemplate(w, "dashboard", map[string]interface{}{ "Title": "Dashboard", "Stats": stats, "Categories": categories, }) } func browseHandler(w http.ResponseWriter, r *http.Request) { renderTemplate(w, "browse", map[string]interface{}{ "Title": "Browse Documents", "Categories": categories, "CatStats": GetCategoryStats(categories), }) } func browseCategoryHandler(w http.ResponseWriter, r *http.Request) { category := chi.URLParam(r, "category") docs, _ := GetDocumentsByCategory(category) renderTemplate(w, "category", map[string]interface{}{ "Title": strings.Title(category), "Category": category, "Documents": docs, }) } func documentHandler(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") doc, err := GetDocument(id) if err != nil { http.Error(w, "Document not found", http.StatusNotFound) return } renderTemplate(w, "document", map[string]interface{}{ "Title": doc.Title, "Document": doc, "Categories": categories, }) } func searchHandler(w http.ResponseWriter, r *http.Request) { query := r.URL.Query().Get("q") var docs []Document if query != "" { // Try FTS first docs, _ = SearchDocuments(query, 50) // If no keyword results, try semantic search if len(docs) == 0 { if emb, err := GenerateEmbedding(query); err == nil { docs, _ = SemanticSearch(emb, 10) } } } renderTemplate(w, "search", map[string]interface{}{ "Title": "Search", "Query": query, "Documents": docs, }) } func servePDF(w http.ResponseWriter, r *http.Request) { hash := chi.URLParam(r, "hash") download := r.URL.Query().Get("download") == "1" // Try PDF first, then TXT, then bare for _, ext := range []string{".pdf", ".txt", ""} { path := filepath.Join(storeDir, hash+ext) fi, err := os.Stat(path) if err != nil { continue } if download { filename := hash + ext if doc, err := GetDocument(hash); err == nil && doc.Title != "" { filename = sanitizeFilename(doc.Title) if ext != "" && !strings.HasSuffix(strings.ToLower(filename), ext) { filename += ext } } f, err := os.Open(path) if err != nil { http.Error(w, "Cannot read file", http.StatusInternalServerError) return } defer f.Close() if ext == ".pdf" || ext == "" { w.Header().Set("Content-Type", "application/pdf") } else { w.Header().Set("Content-Type", "text/plain") } w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, filename)) w.Header().Set("Content-Length", fmt.Sprintf("%d", fi.Size())) io.Copy(w, f) return } // Inline viewing if ext == ".pdf" || ext == "" { w.Header().Set("Content-Type", "application/pdf") } else { w.Header().Set("Content-Type", "text/plain") } http.ServeFile(w, r, path) return } http.Error(w, "File not found", http.StatusNotFound) } func serveImage(w http.ResponseWriter, r *http.Request) { hash := chi.URLParam(r, "hash") mimeTypes := map[string]string{ ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", ".gif": "image/gif", ".webp": "image/webp", ".tiff": "image/tiff", } for ext, mime := range mimeTypes { path := filepath.Join(storeDir, hash+ext) if _, err := os.Stat(path); err == nil { w.Header().Set("Content-Type", mime) w.Header().Set("Cache-Control", "private, max-age=3600") http.ServeFile(w, r, path) return } } http.NotFound(w, r) } // sanitizeFilename removes characters unsafe for use in Content-Disposition filenames. func sanitizeFilename(name string) string { replacer := strings.NewReplacer(`"`, "'", "/", "-", "\\", "-", "\n", " ", "\r", "") return replacer.Replace(name) } // API handlers func apiSearchHandler(w http.ResponseWriter, r *http.Request) { query := r.FormValue("q") if query == "" { w.Write([]byte("")) return } docs, err := SearchDocuments(query, 50) if err != nil { // Fallback to simple search docs, _ = SearchDocumentsFallback(query, 50) } renderPartial(w, "document-list", docs) } // apiSearchMDHandler handles GET /api/search?q={query}&format=md // Returns all matching documents as concatenated plain-text markdown, // one document per section separated by ---. Intended for AI/LLM consumption. func apiSearchMDHandler(w http.ResponseWriter, r *http.Request) { query := r.URL.Query().Get("q") format := r.URL.Query().Get("format") // Only serve markdown format; anything else falls back to a 400 if format != "md" { http.Error(w, "format=md required", http.StatusBadRequest) return } if query == "" { http.Error(w, "q parameter required", http.StatusBadRequest) return } docs, err := SearchDocuments(query, 200) if err != nil { docs, _ = SearchDocumentsFallback(query, 200) } w.Header().Set("Content-Type", "text/markdown; charset=utf-8") if len(docs) == 0 { w.Write([]byte("No documents found matching: " + query + "\n")) return } var sb strings.Builder for i, doc := range docs { sb.WriteString("# Document: ") if doc.Title != "" { sb.WriteString(doc.Title) } else { sb.WriteString("(untitled)") } sb.WriteString("\n") sb.WriteString("ID: ") sb.WriteString(doc.ID) if doc.Category != "" { sb.WriteString(" | Category: ") sb.WriteString(doc.Category) } if doc.Date != "" { sb.WriteString(" | Date: ") sb.WriteString(doc.Date) } if doc.Vendor != "" { sb.WriteString(" | Vendor: ") sb.WriteString(doc.Vendor) } sb.WriteString("\n\n") text := doc.FullText if text == "" { text = doc.Summary } if text != "" { sb.WriteString(text) sb.WriteString("\n") } if i < len(docs)-1 { sb.WriteString("\n---\n\n") } } w.Write([]byte(sb.String())) } func apiProcessingHandler(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(GetActiveJobs()) } func apiDocumentsHandler(w http.ResponseWriter, r *http.Request) { category := r.URL.Query().Get("category") var docs []Document if category != "" { docs, _ = GetDocumentsByCategory(category) } else { docs, _ = GetAllDocuments() } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(docs) } func uploadHandler(w http.ResponseWriter, r *http.Request) { r.ParseMultipartForm(32 << 20) // 32MB max file, header, err := r.FormFile("file") if err != nil { http.Error(w, "Failed to read file", http.StatusBadRequest) return } defer file.Close() // Save to inbox filename := fmt.Sprintf("%d_%s", time.Now().Unix(), header.Filename) destPath := filepath.Join(inboxDir, filename) dest, err := os.Create(destPath) if err != nil { http.Error(w, "Failed to save file", http.StatusInternalServerError) return } defer dest.Close() io.Copy(dest, file) // Check for duplicate before processing hash, _ := FileHash(destPath) existingDoc, _ := GetDocument(hash) if existingDoc != nil && existingDoc.Status != "processing" { // Document already exists — remove inbox file, return existing os.Remove(destPath) w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]interface{}{ "status": "duplicate", "filename": filename, "message": "Document already exists in your library.", "document": map[string]string{ "id": existingDoc.ID, "title": existingDoc.Title, "category": existingDoc.Category, }, }) return } // Create pending document immediately (shows in UI right away) InsertPendingDocument(hash, header.Filename) // Process document (async) go func() { if doc, err := ProcessDocument(destPath); err != nil { log.Printf("Process error for %s: %v", filename, err) UpdateDocumentStatus(hash, "error") } else { log.Printf("Processed: %s → %s/%s", filename, doc.Category, doc.ID) } }() w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]interface{}{ "status": "success", "filename": filename, "id": hash, "message": "Processing...", }) } // ingestHandler accepts JSON with base64-encoded file content // POST /api/ingest // { // "filename": "invoice.pdf", // "content": "", // "source": "email", // optional metadata // "subject": "Your invoice", // optional // "from": "billing@example.com" // optional // } func ingestHandler(w http.ResponseWriter, r *http.Request) { var req struct { Filename string `json:"filename"` Content string `json:"content"` URL string `json:"url"` Source string `json:"source"` Subject string `json:"subject"` From string `json:"from"` } if err := json.NewDecoder(r.Body).Decode(&req); err != nil { http.Error(w, "Invalid JSON", http.StatusBadRequest) return } var data []byte if req.URL != "" { // Fetch from URL resp, err := http.Get(req.URL) if err != nil { http.Error(w, "Failed to fetch URL: "+err.Error(), http.StatusBadGateway) return } defer resp.Body.Close() if resp.StatusCode >= 400 { http.Error(w, fmt.Sprintf("URL returned %d", resp.StatusCode), http.StatusBadGateway) return } data, err = io.ReadAll(resp.Body) if err != nil { http.Error(w, "Failed to read URL content", http.StatusInternalServerError) return } // Derive filename from URL or content-type if not provided if req.Filename == "" { ext := "" ct := resp.Header.Get("Content-Type") switch { case strings.Contains(ct, "jpeg") || strings.Contains(ct, "jpg"): ext = ".jpg" case strings.Contains(ct, "png"): ext = ".png" case strings.Contains(ct, "gif"): ext = ".gif" case strings.Contains(ct, "webp"): ext = ".webp" case strings.Contains(ct, "pdf"): ext = ".pdf" case strings.Contains(ct, "tiff"): ext = ".tiff" default: // Try to get extension from URL path urlPath := resp.Request.URL.Path if e := filepath.Ext(urlPath); e != "" { ext = e } else { ext = ".bin" } } // Use last path segment of URL as base name base := filepath.Base(resp.Request.URL.Path) if base == "." || base == "/" { base = "url-import" } if filepath.Ext(base) == "" { base += ext } req.Filename = base } } else { if req.Filename == "" || req.Content == "" { http.Error(w, "filename and content required, or provide url", http.StatusBadRequest) return } // Decode base64 content var err error data, err = base64.StdEncoding.DecodeString(req.Content) if err != nil { http.Error(w, "Invalid base64 content", http.StatusBadRequest) return } } // Sanitize filename safeName := strings.ReplaceAll(req.Filename, "/", "_") safeName = strings.ReplaceAll(safeName, "\\", "_") // Generate unique filename with timestamp filename := fmt.Sprintf("%d_%s", time.Now().Unix(), safeName) destPath := filepath.Join(inboxDir, filename) // Write file if err := os.WriteFile(destPath, data, 0644); err != nil { http.Error(w, "Failed to write file", http.StatusInternalServerError) return } // Process immediately (async) go func() { if doc, err := ProcessDocument(destPath); err != nil { log.Printf("Process error for %s: %v", filename, err) } else { // Store email metadata if provided if req.Source != "" || req.Subject != "" || req.From != "" { doc.Metadata = map[string]string{ "source": req.Source, "subject": req.Subject, "from": req.From, } UpdateDocumentMetadata(doc.ID, doc.Metadata) } log.Printf("Processed: %s → %s/%s", filename, doc.Category, doc.ID) } }() w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{ "status": "success", "filename": filename, "message": "Document ingested. Processing started.", }) } func updateDocumentHandler(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") var update struct { Title string `json:"title"` Category string `json:"category"` Notes string `json:"notes"` } if err := json.NewDecoder(r.Body).Decode(&update); err != nil { http.Error(w, "Invalid request", http.StatusBadRequest) return } // Get current document to check if category changed doc, err := GetDocument(id) if err != nil { http.Error(w, "Document not found", http.StatusNotFound) return } // Update in database if err := UpdateDocument(id, DocumentUpdate{ Title: update.Title, Category: update.Category, Notes: update.Notes, }); err != nil { http.Error(w, "Failed to update", http.StatusInternalServerError) return } // Move file if category changed if doc.Category != update.Category && doc.RecordPath != "" { newDir := filepath.Join(recordsDir, update.Category) os.MkdirAll(newDir, 0755) newPath := filepath.Join(newDir, filepath.Base(doc.RecordPath)) if err := os.Rename(doc.RecordPath, newPath); err == nil { UpdateDocumentRecordPath(id, newPath) } } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{"status": "success"}) } func deleteDocumentHandler(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") doc, err := GetDocument(id) if err != nil { http.Error(w, "Document not found", http.StatusNotFound) return } // Delete from database (includes FTS and embeddings) DeleteDocument(id) // Delete record file if doc.RecordPath != "" { os.Remove(doc.RecordPath) } // Delete store files (PDF/TXT) for _, ext := range []string{".pdf", ".txt", ""} { path := filepath.Join(storeDir, id+ext) os.Remove(path) // ignore errors for non-existent files } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{"status": "deleted"}) } func exportCSVHandler(w http.ResponseWriter, r *http.Request) { category := r.URL.Query().Get("category") var docs []Document if category != "" { docs, _ = GetDocumentsByCategory(category) } else { docs, _ = GetAllDocuments() } w.Header().Set("Content-Type", "text/csv") w.Header().Set("Content-Disposition", "attachment; filename=documents.csv") writer := csv.NewWriter(w) writer.Write([]string{"ID", "Title", "Category", "Type", "Date", "Amount", "Vendor", "Summary"}) for _, doc := range docs { writer.Write([]string{ doc.ID, doc.Title, doc.Category, doc.Type, doc.Date, doc.Amount, doc.Vendor, doc.Summary, }) } writer.Flush() } func debugStatsHandler(w http.ResponseWriter, r *http.Request) { stats, err := GetStats() w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]interface{}{ "error": err, "total": stats.TotalDocs, "recent": stats.RecentDocs, "uploadsCount": len(stats.RecentUploads), "recentUploads": stats.RecentUploads, }) } func createShareHandler(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") var req struct { Days int `json:"days"` } req.Days = 7 // default json.NewDecoder(r.Body).Decode(&req) token, err := CreateShare(id, req.Days) if err != nil { http.Error(w, "Failed to create share", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{ "token": token, "url": "/s/" + token + ".pdf", }) } func deleteShareHandler(w http.ResponseWriter, r *http.Request) { token := chi.URLParam(r, "token") DeleteShare(token) w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{"status": "deleted"}) } func listSharesHandler(w http.ResponseWriter, r *http.Request) { id := chi.URLParam(r, "id") shares, err := GetSharesByDocument(id) if err != nil { shares = []Share{} } if shares == nil { shares = []Share{} } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(shares) } func publicShareHandler(w http.ResponseWriter, r *http.Request) { token := chi.URLParam(r, "token") token = strings.TrimSuffix(token, ".pdf") doc, err := GetShare(token) if err != nil || doc == nil { http.Error(w, "Not found", http.StatusNotFound) return } // Serve the PDF directly for _, ext := range []string{".pdf", ".txt", ""} { path := filepath.Join(storeDir, doc.ID+ext) if _, err := os.Stat(path); err == nil { if ext == ".pdf" || ext == "" { w.Header().Set("Content-Type", "application/pdf") } else { w.Header().Set("Content-Type", "text/plain") } // Set filename so Android/browsers handle it properly filename := strings.ReplaceAll(doc.Title, " ", "-") + ext if ext == "" { filename += ".pdf" } w.Header().Set("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, filename)) http.ServeFile(w, r, path) return } } http.Error(w, "File not found", http.StatusNotFound) } func reindexHandler(w http.ResponseWriter, r *http.Request) { // DISABLED - this was destructive (wiped all docs without repopulating) // Old behavior cleared all docs then re-indexed markdown files (which we don't use anymore) // TODO: Implement safe reprocessing that doesn't delete existing docs log.Printf("Reindex endpoint called but disabled (would wipe all data)") w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{"status": "reindexed"}) }