fix: pdftoppm output filename glob instead of hardcoded page-1.png
pdftoppm zero-pads the page number based on total page count:
- <10 pages: page-1.png
- <100 pages: page-01.png
- <1000 pages: page-001.png
The code hardcoded 'page-1.png' and 'page-N.png', which fails for any
multi-page document. Use filepath.Glob('page-*.png') to find the actual
output regardless of padding width.
Fixed in both ConvertToImage() (first-page preview) and the multi-page
OCR loop in ProcessDocument().
This commit is contained in:
parent
9622ab9390
commit
883f118d66
19
ai.go
19
ai.go
|
|
@ -116,8 +116,14 @@ func ConvertToImage(filePath string) ([]byte, error) {
|
||||||
return nil, fmt.Errorf("pdftoppm failed: %w", err)
|
return nil, fmt.Errorf("pdftoppm failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
pngPath := filepath.Join(tmpDir, "page-1.png")
|
// pdftoppm uses variable-width zero-padding depending on page count
|
||||||
return os.ReadFile(pngPath)
|
// (e.g. page-01.png for <100 pages, page-001.png for <1000 pages).
|
||||||
|
// Glob for the first match instead of hardcoding "page-1.png".
|
||||||
|
matches, err := filepath.Glob(filepath.Join(tmpDir, "page-*.png"))
|
||||||
|
if err != nil || len(matches) == 0 {
|
||||||
|
return nil, fmt.Errorf("pdftoppm output not found in %s", tmpDir)
|
||||||
|
}
|
||||||
|
return os.ReadFile(matches[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Image files — read directly
|
// Image files — read directly
|
||||||
|
|
@ -483,8 +489,13 @@ func ProcessPDFPageByPage(filePath string, jobID string) (string, error) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
pngPath := filepath.Join(tmpDir, fmt.Sprintf("page-%d.png", page))
|
// Glob for the output — pdftoppm zero-pads based on total page count
|
||||||
imageData, err := os.ReadFile(pngPath)
|
pageMatches, _ := filepath.Glob(filepath.Join(tmpDir, "page-*.png"))
|
||||||
|
if len(pageMatches) == 0 {
|
||||||
|
os.RemoveAll(tmpDir)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
imageData, err := os.ReadFile(pageMatches[0])
|
||||||
os.RemoveAll(tmpDir)
|
os.RemoveAll(tmpDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue