fix: format=md endpoint now returns full OCR text (full_text field)
SearchDocuments excludes full_text for performance. The MD endpoint needs the actual OCR content, not just the summary. Added SearchDocumentsWithFullText() and SearchDocumentsWithFullTextFallback() that select full_text explicitly. apiSearchMDHandler now uses these, so format=md returns the complete OCR/markdown text for each document.
This commit is contained in:
parent
405a6f697f
commit
9622ab9390
75
db.go
75
db.go
|
|
@ -715,3 +715,78 @@ func scanDocumentRows(rows *sql.Rows) ([]Document, error) {
|
||||||
}
|
}
|
||||||
return docs, rows.Err()
|
return docs, rows.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SearchDocumentsWithFullText is like SearchDocuments but includes the full_text
|
||||||
|
// column. Used by the format=md endpoint where OCR content is needed.
|
||||||
|
func SearchDocumentsWithFullText(query string, limit int) ([]Document, error) {
|
||||||
|
if limit <= 0 {
|
||||||
|
limit = 200
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := db.Query(`
|
||||||
|
SELECT d.id, COALESCE(d.title,''), COALESCE(d.category,''), COALESCE(d.type,''),
|
||||||
|
COALESCE(d.date,''), COALESCE(d.amount,''), COALESCE(d.vendor,''),
|
||||||
|
COALESCE(d.summary,''), COALESCE(d.full_text,''), COALESCE(d.pdf_path,''),
|
||||||
|
COALESCE(d.processed_at,''), COALESCE(d.original_file,''), COALESCE(d.status,'ready')
|
||||||
|
FROM documents d
|
||||||
|
JOIN documents_fts fts ON d.id = fts.id
|
||||||
|
WHERE documents_fts MATCH ?
|
||||||
|
ORDER BY rank
|
||||||
|
LIMIT ?
|
||||||
|
`, query, limit)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
var docs []Document
|
||||||
|
for rows.Next() {
|
||||||
|
var doc Document
|
||||||
|
if err := rows.Scan(
|
||||||
|
&doc.ID, &doc.Title, &doc.Category, &doc.Type, &doc.Date,
|
||||||
|
&doc.Amount, &doc.Vendor, &doc.Summary, &doc.FullText, &doc.PDFPath,
|
||||||
|
&doc.ProcessedAt, &doc.OriginalFile, &doc.Status,
|
||||||
|
); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
docs = append(docs, doc)
|
||||||
|
}
|
||||||
|
return docs, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchDocumentsWithFullTextFallback is the LIKE-based fallback that also includes full_text.
|
||||||
|
func SearchDocumentsWithFullTextFallback(query string, limit int) ([]Document, error) {
|
||||||
|
if limit <= 0 {
|
||||||
|
limit = 200
|
||||||
|
}
|
||||||
|
pattern := "%" + query + "%"
|
||||||
|
|
||||||
|
rows, err := db.Query(`
|
||||||
|
SELECT id, COALESCE(title,''), COALESCE(category,''), COALESCE(type,''),
|
||||||
|
COALESCE(date,''), COALESCE(amount,''), COALESCE(vendor,''),
|
||||||
|
COALESCE(summary,''), COALESCE(full_text,''), COALESCE(pdf_path,''),
|
||||||
|
COALESCE(processed_at,''), COALESCE(original_file,''), COALESCE(status,'ready')
|
||||||
|
FROM documents
|
||||||
|
WHERE title LIKE ? OR summary LIKE ? OR vendor LIKE ? OR full_text LIKE ?
|
||||||
|
ORDER BY processed_at DESC
|
||||||
|
LIMIT ?
|
||||||
|
`, pattern, pattern, pattern, pattern, limit)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
var docs []Document
|
||||||
|
for rows.Next() {
|
||||||
|
var doc Document
|
||||||
|
if err := rows.Scan(
|
||||||
|
&doc.ID, &doc.Title, &doc.Category, &doc.Type, &doc.Date,
|
||||||
|
&doc.Amount, &doc.Vendor, &doc.Summary, &doc.FullText, &doc.PDFPath,
|
||||||
|
&doc.ProcessedAt, &doc.OriginalFile, &doc.Status,
|
||||||
|
); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
docs = append(docs, doc)
|
||||||
|
}
|
||||||
|
return docs, rows.Err()
|
||||||
|
}
|
||||||
|
|
|
||||||
4
main.go
4
main.go
|
|
@ -390,9 +390,9 @@ func apiSearchMDHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
docs, err := SearchDocuments(query, 200)
|
docs, err := SearchDocumentsWithFullText(query, 200)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
docs, _ = SearchDocumentsFallback(query, 200)
|
docs, _ = SearchDocumentsWithFullTextFallback(query, 200)
|
||||||
}
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "text/markdown; charset=utf-8")
|
w.Header().Set("Content-Type", "text/markdown; charset=utf-8")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue