From 883f118d66fa2981dd2aaf8dab62a0a8d6242990 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 23 Mar 2026 14:14:28 -0400 Subject: [PATCH] fix: pdftoppm output filename glob instead of hardcoded page-1.png pdftoppm zero-pads the page number based on total page count: - <10 pages: page-1.png - <100 pages: page-01.png - <1000 pages: page-001.png The code hardcoded 'page-1.png' and 'page-N.png', which fails for any multi-page document. Use filepath.Glob('page-*.png') to find the actual output regardless of padding width. Fixed in both ConvertToImage() (first-page preview) and the multi-page OCR loop in ProcessDocument(). --- ai.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/ai.go b/ai.go index 5e93877..470b43e 100644 --- a/ai.go +++ b/ai.go @@ -116,8 +116,14 @@ func ConvertToImage(filePath string) ([]byte, error) { return nil, fmt.Errorf("pdftoppm failed: %w", err) } - pngPath := filepath.Join(tmpDir, "page-1.png") - return os.ReadFile(pngPath) + // pdftoppm uses variable-width zero-padding depending on page count + // (e.g. page-01.png for <100 pages, page-001.png for <1000 pages). + // Glob for the first match instead of hardcoding "page-1.png". + matches, err := filepath.Glob(filepath.Join(tmpDir, "page-*.png")) + if err != nil || len(matches) == 0 { + return nil, fmt.Errorf("pdftoppm output not found in %s", tmpDir) + } + return os.ReadFile(matches[0]) } // Image files — read directly @@ -483,8 +489,13 @@ func ProcessPDFPageByPage(filePath string, jobID string) (string, error) { continue } - pngPath := filepath.Join(tmpDir, fmt.Sprintf("page-%d.png", page)) - imageData, err := os.ReadFile(pngPath) + // Glob for the output — pdftoppm zero-pads based on total page count + pageMatches, _ := filepath.Glob(filepath.Join(tmpDir, "page-*.png")) + if len(pageMatches) == 0 { + os.RemoveAll(tmpDir) + continue + } + imageData, err := os.ReadFile(pageMatches[0]) os.RemoveAll(tmpDir) if err != nil { continue