pulse-monitor/backups/backup_20251127/ocr.go

570 lines
19 KiB
Go

package main
import (
"fmt"
"image"
"image/color"
"io"
"os"
"strings"
"time"
"gocv.io/x/gocv"
)
func loadTemplates() (map[int][]gocv.Mat, error) {
templates := make(map[int][]gocv.Mat)
files, err := os.ReadDir("training_digits")
if err != nil {
logMessage(Console, Warning, "⚠️ WARNING: training_digits directory not found or cannot be read: %v", err)
logMessage(Console, Warning, "⚠️ Starting with empty template set. Recognition will not work until templates are added.")
logMessage(Console, Info, "")
return templates, nil
}
templateCount := 0
for _, file := range files {
if !strings.HasSuffix(file.Name(), ".png") {
continue
}
name := strings.TrimSuffix(file.Name(), ".png")
parts := strings.Split(name, "_")
var digit int
_, err := fmt.Sscanf(parts[0], "%d", &digit)
if err != nil || digit < 0 || digit > 9 {
continue
}
filename := fmt.Sprintf("training_digits/%s", file.Name())
template := gocv.IMRead(filename, gocv.IMReadGrayScale)
if template.Empty() {
logMessage(Console, Warning, "Warning: Failed to load %s", filename)
continue
}
templates[digit] = append(templates[digit], template)
templateCount++
}
// Load invalid digit markers (use index -1 for invalid patterns)
invalidFiles, err := os.ReadDir("training_digits/invalid")
if err == nil {
invalidCount := 0
for _, file := range invalidFiles {
if !strings.HasSuffix(file.Name(), ".png") {
continue
}
filename := fmt.Sprintf("training_digits/invalid/%s", file.Name())
template := gocv.IMRead(filename, gocv.IMReadGrayScale)
if template.Empty() {
logMessage(Console, Warning, "Warning: Failed to load invalid template %s", filename)
continue
}
templates[-1] = append(templates[-1], template)
invalidCount++
}
if invalidCount > 0 {
logMessage(Console, Info, "✓ Loaded %d invalid digit markers", invalidCount)
}
}
for digit := 0; digit <= 9; digit++ {
if len(templates[digit]) == 0 {
logMessage(Console, Warning, "⚠️ WARNING: No templates found for digit %d", digit)
}
}
logMessage(Console, Info, "✓ Loaded %d digit templates (0-9)", templateCount)
return templates, nil
}
func matchDigit(digitImg gocv.Mat, templates map[int][]gocv.Mat) (int, float64) {
bestDigit := -1
bestScore := 0.0
bestInvalidScore := 0.0
// Check invalid patterns first (stored at index -1)
invalidTemplates := templates[-1]
for _, template := range invalidTemplates {
compareImg := digitImg
if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() {
resized := gocv.NewMat()
gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear)
defer resized.Close()
compareImg = resized
}
diff := gocv.NewMat()
gocv.AbsDiff(compareImg, template, &diff)
totalPixels := compareImg.Rows() * compareImg.Cols()
diffPixels := gocv.CountNonZero(diff)
samePixels := totalPixels - diffPixels
score := (float64(samePixels) / float64(totalPixels)) * 100.0
diff.Close()
if score > bestInvalidScore {
bestInvalidScore = score
}
}
// Check regular digits (0-9)
for digit := 0; digit <= 9; digit++ {
digitTemplates := templates[digit]
if len(digitTemplates) == 0 {
// No templates for this digit, skip
continue
}
for _, template := range digitTemplates {
compareImg := digitImg
if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() {
resized := gocv.NewMat()
gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear)
defer resized.Close()
compareImg = resized
}
diff := gocv.NewMat()
gocv.AbsDiff(compareImg, template, &diff)
totalPixels := compareImg.Rows() * compareImg.Cols()
diffPixels := gocv.CountNonZero(diff)
samePixels := totalPixels - diffPixels
score := (float64(samePixels) / float64(totalPixels)) * 100.0
diff.Close()
if score > bestScore {
bestScore = score
bestDigit = digit
}
}
}
// If invalid pattern matches better than any digit (and score > 70%), return -1
if bestInvalidScore > bestScore && bestInvalidScore > 70 {
return -1, bestInvalidScore
}
return bestDigit, bestScore
}
// Helper function: check if there's a '1' digit by cutting at the expected width and matching templates
// ONLY checks against '1' templates to prevent false matches with '4', '7', etc.
func hasOneAt(thresh gocv.Mat, x int, templates map[int][]gocv.Mat, logger io.Writer) bool {
h := thresh.Rows()
w := thresh.Cols()
// Calculate the region where a '1' would be if it exists
// '1' is 72px wide, so extract from (x - 72) to x
leftEdge := x - DIGIT_ONE_WIDTH
if leftEdge < 0 {
leftEdge = 0
}
if x > w {
x = w
}
logMessage(Both, Debug, " hasOneAt(x=%d): extracting region [%d..%d] (width=%d) from display width=%d",
x, leftEdge, x, x-leftEdge, w)
// Extract the potential '1' region
region := thresh.Region(image.Rect(leftEdge, 0, x, h))
digitImg := region.Clone()
region.Close()
// Match ONLY against '1' templates (don't check other digits)
oneTemplates := templates[1]
if len(oneTemplates) == 0 {
logMessage(LogFile, Warning, " hasOneAt(x=%d): No '1' templates loaded -> NO", x)
digitImg.Close()
return false
}
bestScore := 0.0
for _, template := range oneTemplates {
compareImg := digitImg
if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() {
resized := gocv.NewMat()
gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear)
defer resized.Close()
compareImg = resized
}
diff := gocv.NewMat()
gocv.AbsDiff(compareImg, template, &diff)
totalPixels := compareImg.Rows() * compareImg.Cols()
diffPixels := gocv.CountNonZero(diff)
samePixels := totalPixels - diffPixels
score := (float64(samePixels) / float64(totalPixels)) * 100.0
diff.Close()
if score > bestScore {
bestScore = score
}
}
digitImg.Close()
// If it matches '1' with >85% confidence, we have a '1'
if bestScore > 85.0 {
logMessage(Both, Debug, " hasOneAt(x=%d): matched '1' with %.1f%% confidence -> YES", x, bestScore)
return true
}
logMessage(Both, Debug, " hasOneAt(x=%d): best '1' match %.1f%% confidence -> NO", x, bestScore)
return false
}
// validateEight checks if a digit image has the two characteristic holes of an '8'
// by checking for black pixels along horizontal lines where holes should be
func validateEight(digitImg gocv.Mat, logger io.Writer) bool {
h := digitImg.Rows()
w := digitImg.Cols()
// Upper hole: 30% of height
// Lower hole: 70% of height
// Horizontal range: 40%-50% of width
topHoleY := (h * 30) / 100
bottomHoleY := (h * 70) / 100
xStart := (w * 40) / 100
xEnd := (w * 50) / 100
// Check for ANY black pixel in the horizontal range
// In thresholded image: black (0) = hole, white (255) = digit
topHoleFound := false
for x := xStart; x <= xEnd; x++ {
pixel := digitImg.GetUCharAt(topHoleY, x)
if pixel < 128 {
topHoleFound = true
break
}
}
bottomHoleFound := false
for x := xStart; x <= xEnd; x++ {
pixel := digitImg.GetUCharAt(bottomHoleY, x)
if pixel < 128 {
bottomHoleFound = true
break
}
}
logMessage(LogFile, Debug, " validateEight: top hole @ y=%d (30%%) x=%d-%d (%v), bottom hole @ y=%d (70%%) x=%d-%d (%v)",
topHoleY, xStart, xEnd, topHoleFound,
bottomHoleY, xStart, xEnd, bottomHoleFound)
return topHoleFound && bottomHoleFound
}
// validateZero checks if a digit image has the characteristic hole of a '0'
// by checking for black pixels along a horizontal line at the center
func validateZero(digitImg gocv.Mat, logger io.Writer) bool {
h := digitImg.Rows()
w := digitImg.Cols()
// Center hole: 50% of height
// Horizontal range: 30%-70% of width (wider range than '8' since '0' hole is larger)
centerY := h / 2
xStart := (w * 30) / 100
xEnd := (w * 70) / 100
// Check for ANY black pixel in the horizontal range
// In thresholded image: black (0) = hole, white (255) = digit
holeFound := false
for x := xStart; x <= xEnd; x++ {
pixel := digitImg.GetUCharAt(centerY, x)
if pixel < 128 {
holeFound = true
break
}
}
logMessage(LogFile, Debug, " validateZero: center hole @ y=%d (50%%) x=%d-%d (%v)",
centerY, xStart, xEnd, holeFound)
return holeFound
}
// New simplified version that works with fixed-width rectangles
func recognizeDisplayArea(rotated gocv.Mat, area image.Rectangle, templates map[int][]gocv.Mat, displayName string, frameCount int, logger io.Writer) (int, int, float64, int, float64) {
startTime := time.Now()
cleanName := strings.TrimSpace(strings.ToLower(displayName))
// Extract the display area (ALREADY THRESHOLDED BINARY IMAGE)
region := rotated.Region(area)
thresh := region.Clone()
region.Close()
// DEBUG: Save extracted region - already thresholded
if DEBUG_MODE {
step1Filename := fmt.Sprintf("review/f%d_%s_step1_extracted.png", frameCount, cleanName)
gocv.IMWrite(step1Filename, thresh)
}
w := thresh.Cols()
h := thresh.Rows()
// Log display dimensions
logMessage(LogFile, Debug, " %s display: w=%d, h=%d", displayName, w, h)
// Sequential check for '1' patterns to determine digit widths
// hasOneAt checks if column X is white and column X+10 is black (detects RIGHT EDGE where '1' ends)
// Digit detection (right to left: digit3, digit2, digit1)
// D stores cut positions, digitIsOne stores detection results
D := make([]int, 4) // indices 1,2,3 (0 unused)
digitIsOne := make([]bool, 4) // indices 1,2,3 (0 unused)
totalWidth := 0
logMessage(Both, Debug, " [%s] Display width=%d, starting digit detection...", displayName, w)
detectionStart := time.Now()
for i := 3; i >= 1; i-- {
var checkPos int
if i == 3 {
checkPos = w // Check at right edge, not w-8
} else {
checkPos = w - totalWidth - 5
}
logMessage(Both, Debug, " [%s] Digit %d: checkPos=%d (w=%d, totalWidth=%d)", displayName, i, checkPos, w, totalWidth)
digitIsOne[i] = hasOneAt(thresh, checkPos, templates, logger)
// Add width to accumulator
var widthUsed int
if digitIsOne[i] {
widthUsed = DIGIT_ONE_WIDTH
totalWidth += DIGIT_ONE_WIDTH
logMessage(Both, Debug, " [%s] Digit %d: IS ONE -> using width=%d (totalWidth now %d)", displayName, i, widthUsed, totalWidth)
} else {
widthUsed = DIGIT_NON_ONE_WIDTH
totalWidth += DIGIT_NON_ONE_WIDTH
logMessage(Both, Debug, " [%s] Digit %d: NOT ONE -> using width=%d (totalWidth now %d)", displayName, i, widthUsed, totalWidth)
}
D[i] = w - totalWidth
logMessage(Both, Debug, " [%s] Digit %d: CUT position D[%d]=%d", displayName, i, i, D[i])
}
logMessage(LogFile, Debug, " [TIMING][%s] Digit detection (hasOneAt x3): %dms", displayName, time.Since(detectionStart).Milliseconds())
logMessage(Both, Debug, " [%s] Final cut positions: D[1]=%d, D[2]=%d, D[3]=%d", displayName, D[1], D[2], D[3])
// Create visualization image showing check positions AND cut positions
visImg := gocv.NewMat()
gocv.CvtColor(thresh, &visImg, gocv.ColorGrayToBGR)
// Draw vertical lines at check positions with labels
greenColor := color.RGBA{0, 255, 0, 255} // Green for '1' detected
redColor := color.RGBA{0, 0, 255, 255} // Red for no '1'
yellowColor := color.RGBA{0, 255, 255, 255} // Yellow for CUT lines
// Digit 3 check line
digit3Color := redColor
if digitIsOne[3] {
digit3Color = greenColor
}
gocv.Line(&visImg, image.Pt(w-8, 0), image.Pt(w-8, h), digit3Color, 2)
gocv.PutText(&visImg, "D3", image.Pt(w-8+5, 20), gocv.FontHersheyDuplex, 0.5, digit3Color, 1)
// Digit 2 check line (calculate position from accumulated width)
var digit2Width int
if digitIsOne[3] {
digit2Width = DIGIT_ONE_WIDTH
} else {
digit2Width = DIGIT_NON_ONE_WIDTH
}
digit2CheckPos := w - digit2Width - 5
digit2Color := redColor
if digitIsOne[2] {
digit2Color = greenColor
}
gocv.Line(&visImg, image.Pt(digit2CheckPos, 0), image.Pt(digit2CheckPos, h), digit2Color, 2)
gocv.PutText(&visImg, "D2", image.Pt(digit2CheckPos+5, 40), gocv.FontHersheyDuplex, 0.5, digit2Color, 1)
// Digit 1 check line (calculate position from accumulated width)
var digit3Width int
if digitIsOne[3] {
digit3Width = DIGIT_ONE_WIDTH
} else {
digit3Width = DIGIT_NON_ONE_WIDTH
}
var digit2WidthForD1 int
if digitIsOne[2] {
digit2WidthForD1 = DIGIT_ONE_WIDTH
} else {
digit2WidthForD1 = DIGIT_NON_ONE_WIDTH
}
digit1CheckPos := w - digit3Width - digit2WidthForD1 - 5
digit1Color := redColor
if digitIsOne[1] {
digit1Color = greenColor
}
gocv.Line(&visImg, image.Pt(digit1CheckPos, 0), image.Pt(digit1CheckPos, h), digit1Color, 2)
gocv.PutText(&visImg, "D1", image.Pt(digit1CheckPos+5, 60), gocv.FontHersheyDuplex, 0.5, digit1Color, 1)
// Draw CUT lines in yellow showing where we actually split the digits
gocv.Line(&visImg, image.Pt(D[2], 0), image.Pt(D[2], h), yellowColor, 3)
gocv.PutText(&visImg, "CUT1", image.Pt(D[2]+5, h-10), gocv.FontHersheyDuplex, 0.6, yellowColor, 2)
gocv.Line(&visImg, image.Pt(D[3], 0), image.Pt(D[3], h), yellowColor, 3)
gocv.PutText(&visImg, "CUT2", image.Pt(D[3]+5, h-30), gocv.FontHersheyDuplex, 0.6, yellowColor, 2)
// Save visualization (always - needed for debugging failures)
visFilename := fmt.Sprintf("review/f%d_%s_checks.png", frameCount, cleanName)
gocv.IMWrite(visFilename, visImg)
visImg.Close()
// Clamp cut positions to valid range
if D[3] < 0 {
D[3] = 0
}
if D[2] < 0 {
D[2] = 0
}
// Extract three digit regions
logMessage(Both, Debug, " [%s] Extracting digit1: region [0..%d] (width=%d)", displayName, D[2], D[2])
digit1Region := thresh.Region(image.Rect(0, 0, D[2], h))
digit1 := digit1Region.Clone()
digit1Region.Close()
logMessage(Both, Debug, " [%s] Extracting digit2: region [%d..%d] (width=%d)", displayName, D[2], D[3], D[3]-D[2])
digit2Region := thresh.Region(image.Rect(D[2], 0, D[3], h))
digit2 := digit2Region.Clone()
digit2Region.Close()
logMessage(Both, Debug, " [%s] Extracting digit3: region [%d..%d] (width=%d)", displayName, D[3], w, w-D[3])
digit3Region := thresh.Region(image.Rect(D[3], 0, w, h))
digit3 := digit3Region.Clone()
digit3Region.Close()
// Match all three digits
matchStart := time.Now()
num1, score1 := matchDigit(digit1, templates)
num2, score2 := matchDigit(digit2, templates)
num3, score3 := matchDigit(digit3, templates)
logMessage(LogFile, Debug, " [TIMING][%s] matchDigit x3: %dms", displayName, time.Since(matchStart).Milliseconds())
// Validate '8' digits - check for two holes
if num1 == 8 && !validateEight(digit1, logger) {
logMessage(LogFile, Debug, " ⚠️ Digit1 recognized as 8 but failed hole validation - marking invalid")
num1 = -1
score1 = 0.0
}
if num2 == 8 && !validateEight(digit2, logger) {
logMessage(LogFile, Debug, " ⚠️ Digit2 recognized as 8 but failed hole validation - marking invalid")
num2 = -1
score2 = 0.0
}
if num3 == 8 && !validateEight(digit3, logger) {
logMessage(LogFile, Debug, " ⚠️ Digit3 recognized as 8 but failed hole validation - marking invalid")
num3 = -1
score3 = 0.0
}
// Validate '0' digits - check for center hole
if num1 == 0 && !validateZero(digit1, logger) {
logMessage(LogFile, Debug, " ⚠️ Digit1 recognized as 0 but failed hole validation - marking invalid")
num1 = -1
score1 = 0.0
}
if num2 == 0 && !validateZero(digit2, logger) {
logMessage(LogFile, Debug, " ⚠️ Digit2 recognized as 0 but failed hole validation - marking invalid")
num2 = -1
score2 = 0.0
}
if num3 == 0 && !validateZero(digit3, logger) {
logMessage(LogFile, Debug, " ⚠️ Digit3 recognized as 0 but failed hole validation - marking invalid")
num3 = -1
score3 = 0.0
}
// Calculate final number
var number int
if digitIsOne[1] {
// 3-digit number: 1XX
number = 100 + num2*10 + num3
} else {
// 2-digit number: XX (digit1 region is mostly empty/padding)
number = num2*10 + num3
}
// Save individual digits (only when SAVE_CROPS flag is set - for approving templates)
if SAVE_CROPS {
saveStart := time.Now()
digit1Filename := fmt.Sprintf("review/f%d_%s_digit1.png", frameCount, cleanName)
digit2Filename := fmt.Sprintf("review/f%d_%s_digit2.png", frameCount, cleanName)
digit3Filename := fmt.Sprintf("review/f%d_%s_digit3.png", frameCount, cleanName)
gocv.IMWrite(digit1Filename, digit1)
gocv.IMWrite(digit2Filename, digit2)
gocv.IMWrite(digit3Filename, digit3)
logMessage(LogFile, Debug, " [TIMING][%s] Save digit images: %dms", displayName, time.Since(saveStart).Milliseconds())
}
// Create labeled full image (combine all three digits)
combinedWidth := digit1.Cols() + digit2.Cols() + digit3.Cols()
combinedHeight := digit1.Rows()
if digit2.Rows() > combinedHeight {
combinedHeight = digit2.Rows()
}
if digit3.Rows() > combinedHeight {
combinedHeight = digit3.Rows()
}
fullForLabel := gocv.NewMatWithSize(combinedHeight, combinedWidth, digit1.Type())
// Copy digit1
digit1ROI := fullForLabel.Region(image.Rect(0, 0, digit1.Cols(), digit1.Rows()))
digit1.CopyTo(&digit1ROI)
digit1ROI.Close()
// Copy digit2
digit2ROI := fullForLabel.Region(image.Rect(digit1.Cols(), 0, digit1.Cols()+digit2.Cols(), digit2.Rows()))
digit2.CopyTo(&digit2ROI)
digit2ROI.Close()
// Copy digit3
digit3ROI := fullForLabel.Region(image.Rect(digit1.Cols()+digit2.Cols(), 0, combinedWidth, digit3.Rows()))
digit3.CopyTo(&digit3ROI)
digit3ROI.Close()
// Add padding at top for label
labeledFull := gocv.NewMat()
gocv.CopyMakeBorder(fullForLabel, &labeledFull, 60, 0, 0, 0, gocv.BorderConstant, color.RGBA{0, 0, 0, 255})
fullForLabel.Close()
// Draw label
label := fmt.Sprintf("%d", number)
textColor := color.RGBA{255, 255, 255, 255}
gocv.PutText(&labeledFull, label, image.Pt(10, 45), gocv.FontHersheyDuplex, 1.8, textColor, 3)
// Save labeled full image (always - needed for debugging failures)
fullFilename := fmt.Sprintf("review/f%d_%s_full.png", frameCount, cleanName)
gocv.IMWrite(fullFilename, labeledFull)
labeledFull.Close()
digit1.Close()
digit2.Close()
digit3.Close()
thresh.Close()
// Calculate widths for logging
digit3Width = DIGIT_ONE_WIDTH
if !digitIsOne[3] {
digit3Width = DIGIT_NON_ONE_WIDTH
}
digit2Width = DIGIT_ONE_WIDTH
if !digitIsOne[2] {
digit2Width = DIGIT_NON_ONE_WIDTH
}
// Print debug info to log only
if digitIsOne[1] {
logMessage(LogFile, Info, " %s: 1%d%d [3-DIGIT: d1=1@%dpx (x=0-%d), d2=%s@%dpx (x=%d-%d), d3=%s@%dpx (x=%d-%d)] (scores: %d=%.0f%%, %d=%.0f%%, %d=%.0f%%)",
displayName, num2, num3, D[2], D[2],
map[bool]string{true: "1", false: "X"}[digitIsOne[2]], digit2Width, D[2], D[3],
map[bool]string{true: "1", false: "X"}[digitIsOne[3]], digit3Width, D[3], w,
num1, score1, num2, score2, num3, score3)
} else {
logMessage(LogFile, Info, " %s: %d%d [2-DIGIT: d2=%s@%dpx (x=%d-%d), d3=%s@%dpx (x=%d-%d)] (scores: %d=%.0f%%, %d=%.0f%%)",
displayName, num2, num3,
map[bool]string{true: "1", false: "X"}[digitIsOne[2]], digit2Width, D[2], D[3],
map[bool]string{true: "1", false: "X"}[digitIsOne[3]], digit3Width, D[3], w,
num2, score2, num3, score3)
}
logMessage(LogFile, Debug, " [TIMING][%s] Total recognizeDisplayArea: %dms", displayName, time.Since(startTime).Milliseconds())
// Return using middle two digits for compatibility with existing code
return number, num2, score2, num3, score3
}