pulse-monitor/timestamp_ocr.go

86 lines
2.5 KiB
Go

package main
import (
"fmt"
"image"
"strings"
"time"
"github.com/otiai10/gosseract/v2"
"gocv.io/x/gocv"
)
// Global reusable OCR client
var timestampOCRClient *gosseract.Client
// InitTimestampOCR initializes the reusable OCR client
func InitTimestampOCR() {
timestampOCRClient = gosseract.NewClient()
timestampOCRClient.SetPageSegMode(gosseract.PSM_SINGLE_LINE)
timestampOCRClient.SetWhitelist("0123456789-: ")
}
// CloseTimestampOCR closes the OCR client
func CloseTimestampOCR() {
if timestampOCRClient != nil {
timestampOCRClient.Close()
}
}
// extractTimestamp extracts and OCRs the timestamp from the top of the frame
// Returns: (secondsDifference, ocrDurationMs, error)
func extractTimestamp(frame gocv.Mat) (int, int64, error) {
startTime := time.Now()
// Extract 1024x68 region
timestampRegion := frame.Region(image.Rect(0, 0, 1024, 68))
defer timestampRegion.Close()
// Downscale 50% (reduces OCR processing time)
downscaled := gocv.NewMat()
defer downscaled.Close()
gocv.Resize(timestampRegion, &downscaled, image.Point{512, 34}, 0, 0, gocv.InterpolationArea)
// Grayscale
gray := gocv.NewMat()
defer gray.Close()
gocv.CvtColor(downscaled, &gray, gocv.ColorBGRToGray)
// Binary threshold (Otsu automatically finds best threshold)
binary := gocv.NewMat()
defer binary.Close()
gocv.Threshold(gray, &binary, 0, 255, gocv.ThresholdBinary|gocv.ThresholdOtsu)
// Invert (white text on black → black text on white for better OCR)
inverted := gocv.NewMat()
defer inverted.Close()
gocv.BitwiseNot(binary, &inverted)
// Encode as PNG (lossless, better for text)
buf, err := gocv.IMEncode(".png", inverted)
if err != nil {
return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("failed to encode: %w", err)
}
defer buf.Close()
// OCR with REUSED client (major speed boost)
err = timestampOCRClient.SetImageFromBytes(buf.GetBytes())
if err != nil {
return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("failed to set image: %w", err)
}
text, err := timestampOCRClient.Text()
if err != nil {
return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("OCR failed: %w", err)
}
// Parse timestamp
text = strings.TrimSpace(strings.ReplaceAll(text, "\n", ""))
cameraTime, err := time.ParseInLocation("2006-01-02 15:04:05", text, time.Local)
if err != nil {
return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("could not parse timestamp from: '%s'", text)
}
return int(time.Since(cameraTime).Seconds()), time.Since(startTime).Milliseconds(), nil
}