pulse-monitor-v2/main.go

2337 lines
61 KiB
Go

package main
import (
"fmt"
"image"
"image/color"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"gocv.io/x/gocv"
"gopkg.in/yaml.v3"
)
// Config holds application configuration
type Config struct {
Camera struct {
RTSPURL string `yaml:"rtsp_url"`
} `yaml:"camera"`
HomeAssistant struct {
URL string `yaml:"url"`
Token string `yaml:"token"`
} `yaml:"home_assistant"`
OCR struct {
APIKey string `yaml:"api_key"`
} `yaml:"ocr"`
Processing struct {
IntervalSeconds int `yaml:"interval_seconds"`
} `yaml:"processing"`
}
func init() {
// Suppress GStreamer and OpenCV warnings
os.Setenv("OPENCV_LOG_LEVEL", "ERROR")
os.Setenv("GST_DEBUG", "0")
}
func loadConfig(path string) (*Config, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
var cfg Config
if err := yaml.Unmarshal(data, &cfg); err != nil {
return nil, err
}
// Defaults
if cfg.Processing.IntervalSeconds == 0 {
cfg.Processing.IntervalSeconds = 2
}
return &cfg, nil
}
func main() {
// Load config
cfg, err := loadConfig("config.yaml")
if err != nil {
fmt.Printf("Failed to load config: %v\n", err)
os.Exit(1)
}
// Set up components
capture, err := NewCapture(cfg.Camera.RTSPURL)
if err != nil {
fmt.Printf("Failed to connect to camera: %v\n", err)
os.Exit(1)
}
defer capture.Close()
// Load pattern store
patterns, err := NewPatternStore("patterns.csv")
if err != nil {
fmt.Printf("Failed to load patterns: %v\n", err)
os.Exit(1)
}
fmt.Printf("Loaded %d patterns\n", patterns.Count())
// Initialize Home Assistant client
var hass *HASS
if cfg.HomeAssistant.URL != "" && cfg.HomeAssistant.Token != "" {
hass = NewHASS(cfg.HomeAssistant.URL, cfg.HomeAssistant.Token)
fmt.Println("Home Assistant integration enabled")
}
// Create training directory
os.MkdirAll("training", 0755)
// Start web server for training
StartWebServer(8090)
// Handle graceful shutdown
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
fmt.Println("Starting pulse monitor...")
// Find bar region (retry until valid)
var barRegion Region
for {
frame := capture.NextFrame()
if frame.Empty() {
frame.Close()
time.Sleep(time.Second)
continue
}
prep := preprocessFrame(frame)
frame.Close()
gocv.IMWrite("bw_image.png", prep)
barRegion = findBar(prep)
prep.Close()
fmt.Println("Saved bw_image.png and bars_visualization.png")
fmt.Printf("Bar region: (%d,%d) %dx%d\n", barRegion.X, barRegion.Y, barRegion.W, barRegion.H)
if barRegion.W > 0 && barRegion.H > 0 {
break
}
fmt.Println("Invalid bar region, retrying in 2s...")
time.Sleep(2 * time.Second)
}
// Track last known values (for display and HASS)
var lastLeft, lastRight string
var lastPostedLeft, lastPostedRight int
var prevDisplayLeft, prevDisplayRight string // for CR/LF logic
// Rate limit: 2 frames per second
lastProcess := time.Now()
processInterval := 500 * time.Millisecond
// Main loop - detect changes in left/right
for {
select {
case <-sigChan:
fmt.Printf("\nShutting down... Patterns: %d\n", patterns.Count())
GenerateHTML("training")
return
default:
}
frame := capture.NextFrame()
if frame.Empty() {
frame.Close()
time.Sleep(100 * time.Millisecond)
continue
}
// Skip processing if not enough time has passed
if time.Since(lastProcess) < processInterval {
frame.Close()
continue
}
lastProcess = time.Now()
prep := preprocessFrame(frame)
frame.Close()
// Crop bar and find digit regions
barCrop := CropRegion(prep, barRegion)
rows := barCrop.Rows()
// Find digit boundaries
spo2Start, spo2End, hrStart, hrEnd := visualizeColumns(barCrop)
// Crop tightly to each digit region
leftRegion := barCrop.Region(image.Rect(spo2Start, 0, spo2End, rows))
leftCrop := gocv.NewMat()
leftRegion.CopyTo(&leftCrop)
leftRegion.Close()
rightRegion := barCrop.Region(image.Rect(hrStart, 0, hrEnd, rows))
rightCrop := gocv.NewMat()
rightRegion.CopyTo(&rightCrop)
rightRegion.Close()
leftDigitCrop := leftCrop
rightDigitCrop := rightCrop
// Add 1px black border so contours don't touch edges (FindContours ignores border-touching shapes)
leftPadded := gocv.NewMatWithSize(leftCrop.Rows()+2, leftCrop.Cols()+2, leftCrop.Type())
rightPadded := gocv.NewMatWithSize(rightCrop.Rows()+2, rightCrop.Cols()+2, rightCrop.Type())
leftRegionPad := leftPadded.Region(image.Rect(1, 1, leftCrop.Cols()+1, leftCrop.Rows()+1))
rightRegionPad := rightPadded.Region(image.Rect(1, 1, rightCrop.Cols()+1, rightCrop.Rows()+1))
leftCrop.CopyTo(&leftRegionPad)
rightCrop.CopyTo(&rightRegionPad)
leftRegionPad.Close()
rightRegionPad.Close()
// Vectorize contours for shape comparison (RetrievalList = all contours including holes)
leftContours := gocv.FindContours(leftPadded, gocv.RetrievalList, gocv.ChainApproxSimple)
rightContours := gocv.FindContours(rightPadded, gocv.RetrievalList, gocv.ChainApproxSimple)
leftPadded.Close()
rightPadded.Close()
// Recognize numbers using hole/stroke analysis
leftResult := recognizeNumber(leftDigitCrop)
rightResult := recognizeNumber(rightDigitCrop)
// Update values if recognized
if leftResult != "" {
lastLeft = leftResult
if hass != nil {
if val, err := strconv.Atoi(leftResult); err == nil && val != lastPostedLeft {
hass.PostSpO2(val)
lastPostedLeft = val
}
}
}
if rightResult != "" {
lastRight = rightResult
if hass != nil {
if val, err := strconv.Atoi(rightResult); err == nil && val != lastPostedRight {
hass.PostHR(val)
lastPostedRight = val
}
}
}
if lastLeft != "" || lastRight != "" {
timeStr := time.Now().Format("15:04:05")
valuesChanged := lastLeft != prevDisplayLeft || lastRight != prevDisplayRight
if valuesChanged {
fmt.Printf("\n%s SpO2: %-3s HR: %-3s", timeStr, lastLeft, lastRight)
prevDisplayLeft, prevDisplayRight = lastLeft, lastRight
} else {
fmt.Printf("\r%s SpO2: %-3s HR: %-3s", timeStr, lastLeft, lastRight)
}
}
// Cleanup
leftContours.Close()
rightContours.Close()
leftCrop.Close()
rightCrop.Close()
barCrop.Close()
prep.Close()
}
}
// preprocessFrame crops timestamp and rotates the frame
func preprocessFrame(frame gocv.Mat) gocv.Mat {
// Crop timestamp (top 68 pixels)
cropped := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows()))
rotated := gocv.NewMat()
gocv.Rotate(cropped, &rotated, gocv.Rotate90Clockwise)
cropped.Close()
// Convert to grayscale then threshold to B&W
gray := gocv.NewMat()
gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray)
rotated.Close()
bw := gocv.NewMat()
gocv.Threshold(gray, &bw, 210, 255, gocv.ThresholdBinary)
gray.Close()
return bw
}
// parseNumber extracts a number from string
func parseNumber(s string) int {
var n int
for _, c := range s {
if c >= '0' && c <= '9' {
n = n*10 + int(c-'0')
}
}
return n
}
// abs returns the absolute value of an int
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
// findBar detects the digit bar region using B&W image analysis
func findBar(frame gocv.Mat) Region {
// Input is B&W (0 or 255) from preprocessFrame
rows := frame.Rows()
cols := frame.Cols()
// Count white pixels per row
whiteCount := make([]int, rows)
for y := 0; y < rows; y++ {
for x := 0; x < cols; x++ {
if frame.GetUCharAt(y, x) > 0 {
whiteCount[y]++
}
}
}
// Find ALL bars - transitions between rows with/without white pixels
type Bar struct {
Start, End int
HasWhite bool
WhitePixels int
}
var bars []Bar
var transitions []int
hasWhite := whiteCount[0] > 0
barStart := 0
var pixelSum int
transitions = append(transitions, 0)
for y := 0; y < rows; y++ {
rowHasWhite := whiteCount[y] > 0
pixelSum += whiteCount[y]
if rowHasWhite != hasWhite || y == rows-1 {
height := y - barStart
if height >= 5 {
bars = append(bars, Bar{
Start: barStart,
End: y - 1,
HasWhite: hasWhite,
WhitePixels: pixelSum,
})
transitions = append(transitions, y)
}
barStart = y
pixelSum = whiteCount[y]
hasWhite = rowHasWhite
}
}
// Save visualization
vis := gocv.NewMat()
gocv.CvtColor(frame, &vis, gocv.ColorGrayToBGR)
for _, y := range transitions {
gocv.Line(&vis, image.Point{0, y}, image.Point{cols, y}, color.RGBA{255, 0, 0, 255}, 2)
gocv.PutText(&vis, fmt.Sprintf("%d", y), image.Point{10, y - 5}, gocv.FontHersheyPlain, 1.5, color.RGBA{0, 255, 0, 255}, 2)
}
gocv.IMWrite("bars_visualization.png", vis)
vis.Close()
// Find first and last white bar to define content bounds
firstWhite, lastWhite := -1, -1
for i, b := range bars {
if b.HasWhite {
if firstWhite == -1 {
firstWhite = i
}
lastWhite = i
}
}
// Calculate vertical span of white content
var contentStart, contentEnd int
if firstWhite >= 0 && lastWhite >= 0 {
contentStart = bars[firstWhite].Start
contentEnd = bars[lastWhite].End
}
contentSpan := contentEnd - contentStart
// Print bars with position as percentage of white content span
fmt.Println("Detected bars (% = position within white content):")
for _, b := range bars {
t := "BLACK"
if b.HasWhite {
t = "WHITE"
}
height := b.End - b.Start + 1
// Calculate position as percentage of content span
startPct := float64(b.Start-contentStart) / float64(contentSpan) * 100
endPct := float64(b.End-contentStart) / float64(contentSpan) * 100
fmt.Printf(" Y=%d-%d (%d px) %s %.0f%%-%.0f%%\n", b.Start, b.End, height, t, startPct, endPct)
}
// Find the digit bar by position within content span
// It sits around 40-60% and has a significant black gap above it (>5%)
var digitBar Bar
found := false
for i, bar := range bars {
if !bar.HasWhite {
continue
}
// Calculate position as percentage of content span
startPct := float64(bar.Start-contentStart) / float64(contentSpan) * 100
endPct := float64(bar.End-contentStart) / float64(contentSpan) * 100
// Check if bar is in the 30%-70% range (middle of content)
if startPct < 30 || startPct > 70 {
continue
}
// Check black gap above (previous bar should be black with significant height)
if i > 0 {
prevBar := bars[i-1]
if !prevBar.HasWhite {
gapPct := float64(prevBar.End-prevBar.Start+1) / float64(contentSpan) * 100
// Need at least 5% black gap above
if gapPct >= 5 {
digitBar = bar
found = true
fmt.Printf("Selected digit bar: %.0f%%-%.0f%% (gap above: %.1f%%)\n", startPct, endPct, gapPct)
break
}
}
}
}
if !found {
return Region{}
}
// Find leftmost and rightmost white pixels in this bar
minX := cols
maxX := 0
for y := digitBar.Start; y <= digitBar.End; y++ {
for x := 0; x < cols; x++ {
if frame.GetUCharAt(y, x) > 0 {
if x < minX {
minX = x
}
if x > maxX {
maxX = x
}
}
}
}
// Save cropped bar
region := Region{
X: minX,
Y: digitBar.Start,
W: maxX - minX + 1,
H: digitBar.End - digitBar.Start + 1,
}
barCrop := CropRegion(frame, region)
gocv.IMWrite("bar_cropped.png", barCrop)
barCrop.Close()
fmt.Printf("Bar cropped: (%d,%d) %dx%d\n", region.X, region.Y, region.W, region.H)
return region
}
// parseValues extracts SpO2 and HR from API response like "SPO2:91 HR:117"
func parseValues(s string) (spo2, hr int) {
s = strings.ToUpper(s)
var nums []int
current := ""
for _, c := range s {
if c >= '0' && c <= '9' {
current += string(c)
} else if current != "" {
if n, err := strconv.Atoi(current); err == nil {
nums = append(nums, n)
}
current = ""
}
}
if current != "" {
if n, err := strconv.Atoi(current); err == nil {
nums = append(nums, n)
}
}
// Assign by range: SpO2 is 70-100, HR is 40-220
for _, n := range nums {
if n >= 70 && n <= 100 && spo2 == 0 {
spo2 = n
} else if n >= 40 && n <= 220 && hr == 0 {
hr = n
}
}
return
}
// Region defines a rectangular area
type Region struct {
X, Y, W, H int
}
// ColumnType classifies a column's content
type ColumnType int
const (
ColEmpty ColumnType = iota // no white pixels
ColFullHeight // white pixels span most of height (digits)
ColBottomOnly // white pixels only in bottom portion (labels)
)
// classifyColumn determines if a column is empty, full-height (digit), or bottom-only (label)
func classifyColumn(img gocv.Mat, x int) ColumnType {
rows := img.Rows()
topThird := rows / 3
topPixels := 0
bottomPixels := 0
for y := 0; y < rows; y++ {
if img.GetUCharAt(y, x) > 0 {
if y < topThird {
topPixels++
} else {
bottomPixels++
}
}
}
totalPixels := topPixels + bottomPixels
if totalPixels < 2 {
return ColEmpty
}
// If has ANY content in top third, it's a full-height digit
if topPixels >= 1 {
return ColFullHeight
}
// Content only in bottom = small label text
return ColBottomOnly
}
// visualizeColumns draws column classification and finds digit regions
// Returns: spo2Start, spo2End, hrStart, hrEnd (x coordinates)
func visualizeColumns(img gocv.Mat) (int, int, int, int) {
rows, cols := img.Rows(), img.Cols()
// Classify all columns
colTypes := make([]ColumnType, cols)
for x := 0; x < cols; x++ {
colTypes[x] = classifyColumn(img, x)
}
// Find digit regions: contiguous runs of full-height columns
// with gaps of at least minGap EMPTY columns between them
// Labels (BOTTOM columns) don't count as gaps - they're adjacent to digits
// But we crop to the FULL columns only (excluding labels)
minGap := 30
type DigitRun struct{ Start, End int }
var runs []DigitRun
inRun := false
runStart := 0
lastFull := 0 // track last full-height column for tight cropping
emptyCount := 0
for x := 0; x < cols; x++ {
isDigit := colTypes[x] == ColFullHeight
isEmpty := colTypes[x] == ColEmpty
if isDigit {
if !inRun {
inRun = true
runStart = x
}
lastFull = x
emptyCount = 0
} else if inRun {
if isEmpty {
emptyCount++
}
// Only end run if we've seen enough truly empty columns
if emptyCount >= minGap {
// End current run at last FULL column (tight crop, excluding labels)
runs = append(runs, DigitRun{runStart, lastFull + 1})
inRun = false
}
}
}
if inRun {
runs = append(runs, DigitRun{runStart, lastFull + 1})
}
// Create visualization
vis := gocv.NewMat()
gocv.CvtColor(img, &vis, gocv.ColorGrayToBGR)
defer vis.Close()
// Draw column type markers
for x := 0; x < cols; x++ {
var c color.RGBA
switch colTypes[x] {
case ColEmpty:
c = color.RGBA{0, 0, 255, 255} // red
case ColFullHeight:
c = color.RGBA{0, 255, 0, 255} // green
case ColBottomOnly:
c = color.RGBA{255, 0, 0, 255} // blue
}
gocv.Line(&vis, image.Point{x, 0}, image.Point{x, 3}, c, 1)
}
// Draw vertical lines at run boundaries (white)
for _, run := range runs {
gocv.Line(&vis, image.Point{run.Start, 0}, image.Point{run.Start, rows}, color.RGBA{255, 255, 255, 255}, 1)
gocv.Line(&vis, image.Point{run.End, 0}, image.Point{run.End, rows}, color.RGBA{255, 255, 255, 255}, 1)
}
// Draw mid-line (cyan) - shows upper/lower half boundary for digit pattern analysis
gocv.Line(&vis, image.Point{0, rows/2}, image.Point{cols, rows/2}, color.RGBA{0, 255, 255, 255}, 1)
// Save debug image once, but always print first time
gocv.IMWrite("debug_columns.png", vis)
// Return early if we've printed before (use static-like behavior via file mod time)
fmt.Printf("Digit runs (minGap=%d): ", minGap)
for i, run := range runs {
fmt.Printf("Run%d[%d-%d] ", i, run.Start, run.End)
}
fmt.Println()
// Return SpO2 and HR boundaries
if len(runs) >= 2 {
return runs[0].Start, runs[0].End, runs[1].Start, runs[1].End
} else if len(runs) == 1 {
mid := (runs[0].Start + runs[0].End) / 2
return runs[0].Start, mid, mid, runs[0].End
}
return 0, cols/2, cols/2, cols
}
// findDigitRegions analyzes bar image and returns SpO2 and HR regions
// Uses column classification: empty, full-height (digits), bottom-only (labels)
func findDigitRegions(img gocv.Mat) (spo2 image.Rectangle, hr image.Rectangle) {
rows, cols := img.Rows(), img.Cols()
// Classify each column
colTypes := make([]ColumnType, cols)
for x := 0; x < cols; x++ {
colTypes[x] = classifyColumn(img, x)
}
// Find contiguous regions of full-height columns (digit regions)
type Region struct {
Start, End int
}
var digitRegions []Region
inRegion := false
regionStart := 0
for x := 0; x < cols; x++ {
if colTypes[x] == ColFullHeight {
if !inRegion {
inRegion = true
regionStart = x
}
} else {
if inRegion {
// End of region - must be at least 10% of width to count
if x - regionStart > cols/10 {
digitRegions = append(digitRegions, Region{regionStart, x})
}
inRegion = false
}
}
}
// Don't forget last region
if inRegion && cols - regionStart > cols/10 {
digitRegions = append(digitRegions, Region{regionStart, cols})
}
// Debug output
fmt.Printf("Found %d digit regions: ", len(digitRegions))
for _, r := range digitRegions {
pctStart := float64(r.Start) * 100 / float64(cols)
pctEnd := float64(r.End) * 100 / float64(cols)
fmt.Printf("[%d-%d (%.0f%%-%.0f%%)] ", r.Start, r.End, pctStart, pctEnd)
}
fmt.Println()
// We expect 2 digit regions: SpO2 (left) and HR (right)
if len(digitRegions) >= 2 {
// First region = SpO2, Second region = HR
spo2 = image.Rect(digitRegions[0].Start, 0, digitRegions[0].End, rows)
hr = image.Rect(digitRegions[1].Start, 0, digitRegions[1].End, rows)
} else if len(digitRegions) == 1 {
// Only one region found - split it in half
mid := (digitRegions[0].Start + digitRegions[0].End) / 2
spo2 = image.Rect(digitRegions[0].Start, 0, mid, rows)
hr = image.Rect(mid, 0, digitRegions[0].End, rows)
} else {
// Fallback - split whole image
spo2 = image.Rect(0, 0, cols/2, rows)
hr = image.Rect(cols/2, 0, cols, rows)
}
return spo2, hr
}
// CropRegion extracts a region from a Mat
func CropRegion(mat gocv.Mat, r Region) gocv.Mat {
rect := image.Rect(r.X, r.Y, r.X+r.W, r.Y+r.H)
region := mat.Region(rect)
cropped := gocv.NewMat()
region.CopyTo(&cropped)
region.Close()
return cropped
}
// DigitPattern holds a contour's direction pattern and position
type DigitPattern struct {
X int // leftmost X for sorting
Pattern string // direction chain
}
// extractDigitPatterns extracts digit patterns from contours, filtering small ones and sorting left-to-right
// Also tracks hole positions (as percentage of width) and appends to the pattern
// filterRightEdge: if true, ignore contours starting past 80% of width (for right side noise)
func extractDigitPatterns(contours gocv.PointsVector, imgHeight, imgWidth int, filterRightEdge bool) []string {
var patterns []DigitPattern
var holes []image.Rectangle // bounding boxes of potential holes
var digitBounds image.Rectangle // combined bounding box of all digits
// Size thresholds
minDigitHeight := imgHeight * 25 / 100 // outer digit: at least 25% height
minDigitWidth := imgWidth * 15 / 100 // outer digit: at least 15% width
minHoleSize := imgHeight * 10 / 100 // hole: at least 10% in either dimension
// First pass: find digits and potential holes
for i := 0; i < contours.Size(); i++ {
contour := contours.At(i)
rect := gocv.BoundingRect(contour)
isDigit := rect.Dy() >= minDigitHeight && rect.Dx() >= minDigitWidth
if filterRightEdge && rect.Min.X >= imgWidth*80/100 {
isDigit = false
}
isHole := rect.Dy() >= minHoleSize && rect.Dx() >= minHoleSize && rect.Min.Y < imgHeight*60/100
if isHole && !isDigit {
holes = append(holes, rect)
continue
}
if !isDigit {
continue
}
// Filter contours that start too low (labels are at bottom)
if rect.Min.Y > imgHeight*60/100 {
continue
}
// Expand digit bounds
if digitBounds.Empty() {
digitBounds = rect
} else {
digitBounds = digitBounds.Union(rect)
}
// Simplify and get direction pattern
epsilon := 0.01 * gocv.ArcLength(contour, true)
approx := gocv.ApproxPolyDP(contour, epsilon, true)
pts := approx.ToPoints()
if len(pts) > 2 {
dirs := pointsToDirections(pts)
pattern := strings.Join(dirs, "")
if len(pattern) > 0 {
patterns = append(patterns, DigitPattern{X: rect.Min.X, Pattern: pattern})
}
}
}
// Sort by X position (left to right)
for i := 0; i < len(patterns)-1; i++ {
for j := i + 1; j < len(patterns); j++ {
if patterns[j].X < patterns[i].X {
patterns[i], patterns[j] = patterns[j], patterns[i]
}
}
}
// Return patterns in order, with hole positions appended
result := make([]string, len(patterns))
for i, p := range patterns {
result[i] = p.Pattern
}
// Filter holes: only keep those inside the digit bounding box
var holePositions []int
for _, hole := range holes {
// Check if hole center is inside digit bounds
centerX := hole.Min.X + hole.Dx()/2
centerY := hole.Min.Y + hole.Dy()/2
if centerX >= digitBounds.Min.X && centerX <= digitBounds.Max.X &&
centerY >= digitBounds.Min.Y && centerY <= digitBounds.Max.Y {
// Record X position as percentage of digit width (0-9)
relX := centerX - digitBounds.Min.X
pct := relX * 10 / digitBounds.Dx()
if pct > 9 {
pct = 9
}
holePositions = append(holePositions, pct)
}
}
// Sort and append hole positions (e.g., "h35" means holes at 30%, 50% of digit width)
// Skip if >2 holes (likely alarm bell or other icon, not a digit)
if len(holePositions) > 2 {
return nil
}
if len(holePositions) > 0 {
for i := 0; i < len(holePositions)-1; i++ {
for j := i + 1; j < len(holePositions); j++ {
if holePositions[j] < holePositions[i] {
holePositions[i], holePositions[j] = holePositions[j], holePositions[i]
}
}
}
h := "h"
for _, p := range holePositions {
h += fmt.Sprintf("%d", p)
}
result = append(result, h)
}
return result
}
// pointsToDirections converts contour points to compass directions (normalized - no consecutive duplicates)
// Skips segments shorter than minDist pixels
func pointsToDirections(pts []image.Point) []string {
const minDist = 10 // minimum distance between points
dirs := make([]string, 0, len(pts))
var lastDir string
for i := 0; i < len(pts); i++ {
p1 := pts[i]
p2 := pts[(i+1)%len(pts)] // wrap to first point
dx := p2.X - p1.X
dy := p2.Y - p1.Y
// Skip segments that are too short
if abs(dx) < minDist && abs(dy) < minDist {
continue
}
dir := getDirection(dx, dy)
// Normalize: skip consecutive duplicates
if dir != lastDir {
dirs = append(dirs, dir)
lastDir = dir
}
}
return dirs
}
// getDirection returns direction as digit 1-4 (cardinal directions only)
// 1=N, 2=E, 3=S, 4=W
func getDirection(dx, dy int) string {
if dx == 0 && dy == 0 {
return ""
}
adx, ady := abs(dx), abs(dy)
// Pick dominant axis
if adx >= ady {
// Horizontal dominant
if dx > 0 {
return "2" // E
}
return "4" // W
}
// Vertical dominant
if dy > 0 {
return "3" // S
}
return "1" // N
}
/* OLD 8-direction version (keep for easy revert):
// getDirection returns direction as digit 1-8 (clockwise from N)
// 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW
func getDirection8(dx, dy int) string {
if dx == 0 && dy == 0 {
return ""
}
adx, ady := abs(dx), abs(dy)
// Cardinal directions (one axis dominates >2x)
if adx > ady*2 {
if dx > 0 {
return "3" // E
}
return "7" // W
}
if ady > adx*2 {
if dy > 0 {
return "5" // S
}
return "1" // N
}
// Diagonal
if dx > 0 && dy > 0 {
return "4" // SE
}
if dx > 0 && dy < 0 {
return "2" // NE
}
if dx < 0 && dy > 0 {
return "6" // SW
}
return "8" // NW
}
*/
// zhangSuenThinning performs skeletonization using the Zhang-Suen algorithm
// Input: binary image (0 = background, 255 = foreground)
// Returns: thinned image (1-pixel wide skeleton)
func zhangSuenThinning(img gocv.Mat) gocv.Mat {
rows, cols := img.Rows(), img.Cols()
// Convert to binary array for easier manipulation
data := make([][]byte, rows)
for y := 0; y < rows; y++ {
data[y] = make([]byte, cols)
for x := 0; x < cols; x++ {
if img.GetUCharAt(y, x) > 127 {
data[y][x] = 1
}
}
}
// Neighbor offsets (P2,P3,P4,P5,P6,P7,P8,P9 clockwise from top)
// P9 P2 P3
// P8 P1 P4
// P7 P6 P5
dy := []int{-1, -1, 0, 1, 1, 1, 0, -1}
dx := []int{0, 1, 1, 1, 0, -1, -1, -1}
getP := func(y, x, i int) byte {
ny, nx := y+dy[i], x+dx[i]
if ny < 0 || ny >= rows || nx < 0 || nx >= cols {
return 0
}
return data[ny][nx]
}
// Count 0->1 transitions in clockwise order
transitions := func(y, x int) int {
count := 0
for i := 0; i < 8; i++ {
if getP(y, x, i) == 0 && getP(y, x, (i+1)%8) == 1 {
count++
}
}
return count
}
// Count non-zero neighbors
neighbors := func(y, x int) int {
count := 0
for i := 0; i < 8; i++ {
count += int(getP(y, x, i))
}
return count
}
changed := true
for changed {
changed = false
// Sub-iteration 1
var toRemove []image.Point
for y := 1; y < rows-1; y++ {
for x := 1; x < cols-1; x++ {
if data[y][x] != 1 {
continue
}
n := neighbors(y, x)
if n < 2 || n > 6 {
continue
}
if transitions(y, x) != 1 {
continue
}
// P2 * P4 * P6 == 0
if getP(y, x, 0)*getP(y, x, 2)*getP(y, x, 4) != 0 {
continue
}
// P4 * P6 * P8 == 0
if getP(y, x, 2)*getP(y, x, 4)*getP(y, x, 6) != 0 {
continue
}
toRemove = append(toRemove, image.Point{x, y})
}
}
for _, p := range toRemove {
data[p.Y][p.X] = 0
changed = true
}
// Sub-iteration 2
toRemove = nil
for y := 1; y < rows-1; y++ {
for x := 1; x < cols-1; x++ {
if data[y][x] != 1 {
continue
}
n := neighbors(y, x)
if n < 2 || n > 6 {
continue
}
if transitions(y, x) != 1 {
continue
}
// P2 * P4 * P8 == 0
if getP(y, x, 0)*getP(y, x, 2)*getP(y, x, 6) != 0 {
continue
}
// P2 * P6 * P8 == 0
if getP(y, x, 0)*getP(y, x, 4)*getP(y, x, 6) != 0 {
continue
}
toRemove = append(toRemove, image.Point{x, y})
}
}
for _, p := range toRemove {
data[p.Y][p.X] = 0
changed = true
}
}
// Convert back to Mat
result := gocv.NewMatWithSize(rows, cols, gocv.MatTypeCV8U)
for y := 0; y < rows; y++ {
for x := 0; x < cols; x++ {
if data[y][x] == 1 {
result.SetUCharAt(y, x, 255)
}
}
}
return result
}
// traceSkeletonStrokes traces the skeleton and returns stroke directions
// Returns directions like "RDLU" (Right, Down, Left, Up) for a "9"
func traceSkeletonStrokes(skeleton gocv.Mat) string {
rows, cols := skeleton.Rows(), skeleton.Cols()
// Find all skeleton pixels
var points []image.Point
for y := 0; y < rows; y++ {
for x := 0; x < cols; x++ {
if skeleton.GetUCharAt(y, x) > 0 {
points = append(points, image.Point{x, y})
}
}
}
if len(points) == 0 {
return ""
}
// Build adjacency - 8-connected neighbors
isSet := func(x, y int) bool {
if x < 0 || x >= cols || y < 0 || y >= rows {
return false
}
return skeleton.GetUCharAt(y, x) > 0
}
neighbors := func(p image.Point) []image.Point {
var n []image.Point
for dy := -1; dy <= 1; dy++ {
for dx := -1; dx <= 1; dx++ {
if dx == 0 && dy == 0 {
continue
}
if isSet(p.X+dx, p.Y+dy) {
n = append(n, image.Point{p.X + dx, p.Y + dy})
}
}
}
return n
}
// Find endpoint (pixel with exactly 1 neighbor) - prefer topmost, then leftmost
var startPoint image.Point
foundEndpoint := false
for _, p := range points {
if len(neighbors(p)) == 1 {
if !foundEndpoint || p.Y < startPoint.Y || (p.Y == startPoint.Y && p.X < startPoint.X) {
startPoint = p
foundEndpoint = true
}
}
}
// If no endpoint (closed loop), start from topmost-leftmost point
if !foundEndpoint {
startPoint = points[0]
for _, p := range points {
if p.Y < startPoint.Y || (p.Y == startPoint.Y && p.X < startPoint.X) {
startPoint = p
}
}
}
// Trace the skeleton from startPoint
visited := make(map[image.Point]bool)
var path []image.Point
var trace func(p image.Point)
trace = func(p image.Point) {
if visited[p] {
return
}
visited[p] = true
path = append(path, p)
for _, n := range neighbors(p) {
if !visited[n] {
trace(n)
}
}
}
trace(startPoint)
// Convert path to directions - only emit when moved 10+ pixels from checkpoint
if len(path) < 2 {
return ""
}
const threshold = 20
var result strings.Builder
var lastDir string
checkX, checkY := path[0].X, path[0].Y
for _, p := range path {
dx := p.X - checkX
dy := p.Y - checkY
// Determine dominant direction if we've moved enough
var dir string
if abs(dx) >= threshold || abs(dy) >= threshold {
// Pick the dominant direction
if abs(dy) > abs(dx) {
if dy > 0 {
dir = "D"
} else {
dir = "U"
}
} else {
if dx > 0 {
dir = "R"
} else {
dir = "L"
}
}
// Reset checkpoint to current position
checkX, checkY = p.X, p.Y
}
if dir != "" && dir != lastDir {
result.WriteString(dir)
lastDir = dir
}
}
return result.String()
}
// analyzeHalf analyzes a half of a digit skeleton and returns shape descriptors for left/middle/right
// Returns a 3-char string like "|-|" (vertical, horizontal, vertical) or "|^|" (vertical, arch, vertical)
func analyzeHalf(skeleton gocv.Mat) string {
rows, cols := skeleton.Rows(), skeleton.Cols()
if rows < 3 || cols < 3 {
return "..."
}
// Divide into left/middle/right thirds
third := cols / 3
// Count pixels in each vertical slice and their distribution
analyzeThird := func(startX, endX int) rune {
var pixels []image.Point
for y := 0; y < rows; y++ {
for x := startX; x < endX; x++ {
if skeleton.GetUCharAt(y, x) > 0 {
pixels = append(pixels, image.Point{x, y})
}
}
}
if len(pixels) < 2 {
return '.' // empty
}
// Find Y span and X span
minY, maxY := rows, 0
minX, maxX := cols, 0
for _, p := range pixels {
if p.Y < minY { minY = p.Y }
if p.Y > maxY { maxY = p.Y }
if p.X < minX { minX = p.X }
if p.X > maxX { maxX = p.X }
}
ySpan := maxY - minY
xSpan := maxX - minX
// Vertical stroke: tall and narrow
if ySpan > rows/2 && xSpan < cols/4 {
return '|'
}
// Horizontal stroke: wide and short
if xSpan > (endX-startX)/2 && ySpan < rows/3 {
return '-'
}
// Has content but not clearly vertical or horizontal
return 'o' // curve/other
}
// For middle section, also detect arch vs bowl
analyzeMiddle := func() rune {
startX, endX := third, cols-third
var pixels []image.Point
for y := 0; y < rows; y++ {
for x := startX; x < endX; x++ {
if skeleton.GetUCharAt(y, x) > 0 {
pixels = append(pixels, image.Point{x, y})
}
}
}
if len(pixels) < 2 {
return '.' // empty
}
// Check if pixels are concentrated at top (arch) or bottom (bowl)
topCount, bottomCount := 0, 0
midY := rows / 2
for _, p := range pixels {
if p.Y < midY {
topCount++
} else {
bottomCount++
}
}
// Find horizontal span
minX, maxX := cols, 0
minY, maxY := rows, 0
for _, p := range pixels {
if p.X < minX { minX = p.X }
if p.X > maxX { maxX = p.X }
if p.Y < minY { minY = p.Y }
if p.Y > maxY { maxY = p.Y }
}
xSpan := maxX - minX
ySpan := maxY - minY
// Horizontal line
if xSpan > (endX-startX)/2 && ySpan < rows/3 {
return '-'
}
// Arch (pixels at top, spans width)
if topCount > bottomCount*2 && xSpan > (endX-startX)/3 {
return '^'
}
// Bowl (pixels at bottom, spans width)
if bottomCount > topCount*2 && xSpan > (endX-startX)/3 {
return 'u'
}
return 'o' // other/curve
}
left := analyzeThird(0, third)
middle := analyzeMiddle()
right := analyzeThird(cols-third, cols)
return string([]rune{left, middle, right})
}
// classifyDigit analyzes a digit skeleton and returns a fingerprint based on top/bottom half shapes
func classifyDigit(skeleton gocv.Mat) string {
rows := skeleton.Rows()
if rows < 6 {
return ""
}
// Split into top and bottom halves
midY := rows / 2
topHalf := skeleton.Region(image.Rect(0, 0, skeleton.Cols(), midY))
bottomHalf := skeleton.Region(image.Rect(0, midY, skeleton.Cols(), rows))
topPattern := analyzeHalf(topHalf)
bottomPattern := analyzeHalf(bottomHalf)
topHalf.Close()
bottomHalf.Close()
return topPattern + "/" + bottomPattern
}
// isVerticalStroke checks if a skeleton is just a simple vertical stroke (for detecting "1")
func isVerticalStroke(skeleton gocv.Mat) bool {
rows, cols := skeleton.Rows(), skeleton.Cols()
if rows < 5 {
return false
}
// Count pixels and check distribution
var pixels []image.Point
for y := 0; y < rows; y++ {
for x := 0; x < cols; x++ {
if skeleton.GetUCharAt(y, x) > 0 {
pixels = append(pixels, image.Point{x, y})
}
}
}
if len(pixels) < 3 {
return false
}
// Check X variance - should be small for vertical stroke
minX, maxX := cols, 0
minY, maxY := rows, 0
for _, p := range pixels {
if p.X < minX { minX = p.X }
if p.X > maxX { maxX = p.X }
if p.Y < minY { minY = p.Y }
if p.Y > maxY { maxY = p.Y }
}
xSpan := maxX - minX
ySpan := maxY - minY
// Vertical: tall and narrow
return ySpan > rows/2 && xSpan < cols/3
}
// getDigitBounds finds bounding box of components reaching top 20% (the actual digits, not noise)
func getDigitBounds(img gocv.Mat) image.Rectangle {
rows, cols := img.Rows(), img.Cols()
topThreshold := rows * 20 / 100
// Find connected components on raw image
labels := gocv.NewMat()
numLabels := gocv.ConnectedComponents(img, &labels)
defer labels.Close()
// Check which labels reach the top
reachesTop := make([]bool, numLabels)
for y := 0; y < topThreshold; y++ {
for x := 0; x < cols; x++ {
label := int(labels.GetIntAt(y, x))
if label > 0 {
reachesTop[label] = true
}
}
}
// Find bounding box of all top-reaching components
minX, minY := cols, rows
maxX, maxY := 0, 0
found := false
for y := 0; y < rows; y++ {
for x := 0; x < cols; x++ {
label := int(labels.GetIntAt(y, x))
if label > 0 && reachesTop[label] {
found = true
if x < minX { minX = x }
if x > maxX { maxX = x }
if y < minY { minY = y }
if y > maxY { maxY = y }
}
}
}
if !found {
return image.Rect(0, 0, cols, rows) // fallback to full image
}
// Add padding on all sides to avoid cutting off edges
pad := 3
minX = max(0, minX-pad)
minY = max(0, minY-pad)
maxX = min(cols, maxX+pad)
maxY = min(rows, maxY+pad)
return image.Rect(minX, minY, maxX, maxY)
}
func max(a, b int) int {
if a > b { return a }
return b
}
// HoleInfo holds information about a detected hole
type HoleInfo struct {
CenterX, CenterY int // center position
Area int // pixel count
BoundsW, BoundsH int // bounding box size
}
// findHoles finds enclosed regions (holes) and returns their properties
// Uses connected components on inverted image, excluding border-touching regions
func findHoles(img gocv.Mat) []HoleInfo {
rows, cols := img.Rows(), img.Cols()
if rows < 3 || cols < 3 {
return nil
}
// Invert image (holes become white)
inverted := gocv.NewMat()
gocv.BitwiseNot(img, &inverted)
defer inverted.Close()
// Find connected components
labels := gocv.NewMat()
numLabels := gocv.ConnectedComponents(inverted, &labels)
defer labels.Close()
if numLabels <= 1 {
return nil
}
// Check which labels touch the border (those are background, not holes)
touchesBorder := make([]bool, numLabels)
for x := 0; x < cols; x++ {
touchesBorder[int(labels.GetIntAt(0, x))] = true
touchesBorder[int(labels.GetIntAt(rows-1, x))] = true
}
for y := 0; y < rows; y++ {
touchesBorder[int(labels.GetIntAt(y, 0))] = true
touchesBorder[int(labels.GetIntAt(y, cols-1))] = true
}
// Analyze each hole
var holes []HoleInfo
for label := 1; label < numLabels; label++ {
if touchesBorder[label] {
continue
}
// Find bounds and count pixels
minX, maxX := cols, 0
minY, maxY := rows, 0
sumX, sumY, count := 0, 0, 0
for y := 0; y < rows; y++ {
for x := 0; x < cols; x++ {
if int(labels.GetIntAt(y, x)) == label {
if x < minX { minX = x }
if x > maxX { maxX = x }
if y < minY { minY = y }
if y > maxY { maxY = y }
sumX += x
sumY += y
count++
}
}
}
if count < 4 { // ignore tiny specks
continue
}
holes = append(holes, HoleInfo{
CenterX: sumX / count,
CenterY: sumY / count,
Area: count,
BoundsW: maxX - minX + 1,
BoundsH: maxY - minY + 1,
})
}
return holes
}
// WhiteRun represents a contiguous white region at a scan line
type WhiteRun struct {
Start, End int
}
func (r WhiteRun) Width() int { return r.End - r.Start }
func (r WhiteRun) Mid() int { return (r.Start + r.End) / 2 }
// findWhiteRuns scans a row and returns all white runs
// Merges runs separated by tiny gaps (< 3px) which are likely rendering artifacts
func findWhiteRuns(img gocv.Mat, y int) []WhiteRun {
cols := img.Cols()
var runs []WhiteRun
inWhite := false
runStart := 0
for x := 0; x <= cols; x++ {
isWhite := x < cols && img.GetUCharAt(y, x) > 128
if isWhite && !inWhite {
runStart = x
inWhite = true
} else if !isWhite && inWhite {
runs = append(runs, WhiteRun{runStart, x})
inWhite = false
}
}
// Merge runs separated by tiny gaps (< 3 pixels)
if len(runs) > 1 {
merged := []WhiteRun{runs[0]}
for i := 1; i < len(runs); i++ {
gap := runs[i].Start - merged[len(merged)-1].End
if gap < 3 {
// Merge with previous run
merged[len(merged)-1].End = runs[i].End
} else {
merged = append(merged, runs[i])
}
}
runs = merged
}
return runs
}
// recognizeNumber uses scan-line analysis to find and classify digits
// Strategy: Find "1"s first (narrow at both scan lines), then divide rest equally
func recognizeNumber(img gocv.Mat) string {
rows, cols := img.Rows(), img.Cols()
if rows < 10 || cols < 5 {
return ""
}
y2 := rows * 2 / 5 // upper scan
y4 := rows * 4 / 5 // lower scan
runs2 := findWhiteRuns(img, y2)
runs4 := findWhiteRuns(img, y4)
// Debug
fmt.Printf(" y=%d: ", y2)
for _, r := range runs2 {
fmt.Printf("[%d-%d](%d) ", r.Start, r.End, r.Width())
}
fmt.Printf("\n y=%d: ", y4)
for _, r := range runs4 {
fmt.Printf("[%d-%d](%d) ", r.Start, r.End, r.Width())
}
fmt.Println()
// Find "1"s: narrow runs at both scan lines at similar X position
// A "1" is narrow (<20px) at both levels
type OneDigit struct {
X, W int // position and width
}
var ones []OneDigit
for _, r2 := range runs2 {
if r2.Width() > 20 {
continue // not narrow enough for "1"
}
// Find matching narrow run at y4
for _, r4 := range runs4 {
if r4.Width() > 20 {
continue
}
// Check if they overlap in X (same digit)
overlap := min(r2.End, r4.End) - max(r2.Start, r4.Start)
if overlap > 5 {
// Found a "1"
x := min(r2.Start, r4.Start)
w := max(r2.End, r4.End) - x
ones = append(ones, OneDigit{x, w})
break
}
}
}
// Find content bounds (first and last white pixel columns)
contentStart, contentEnd := cols, 0
for _, r := range runs2 {
if r.Start < contentStart {
contentStart = r.Start
}
if r.End > contentEnd {
contentEnd = r.End
}
}
for _, r := range runs4 {
if r.Start < contentStart {
contentStart = r.Start
}
if r.End > contentEnd {
contentEnd = r.End
}
}
// Build digit list
type Digit struct {
Start, End int
IsOne bool
Runs2, Runs4 []WhiteRun
}
var digits []Digit
// Sort ones by X
for i := 0; i < len(ones); i++ {
for j := i + 1; j < len(ones); j++ {
if ones[j].X < ones[i].X {
ones[i], ones[j] = ones[j], ones[i]
}
}
}
// Find digit boundaries using GAPS at BOTH scan lines
// A gap >15px at EITHER line suggests a digit boundary
gapThreshold := 15
// Special case: if y=58 (4/5) has one merged run >80px but y=29 (2/5) has multiple runs,
// the digits are touching at the bottom. Use a smarter approach for y=29 gaps.
y4Merged := len(runs4) == 1 && runs4[0].Width() > 80
// Collect split points from both scan lines
splitSet := make(map[int]bool)
for i := 1; i < len(runs2); i++ {
gap := runs2[i].Start - runs2[i-1].End
threshold := gapThreshold
if y4Merged && len(runs2) >= 3 {
// When digits merge at bottom, use width analysis:
// A narrow run following a gap is likely "other side of hole", not a new digit
// Don't split before narrow runs (they're continuations of previous digit)
prevWidth := runs2[i-1].Width()
nextWidth := runs2[i].Width()
if nextWidth < prevWidth*2/3 {
// Next run is significantly narrower - likely part of same digit (hole)
continue // Skip this gap
}
threshold = 8 // Lower threshold for digit boundaries
}
if gap > threshold {
splitSet[(runs2[i-1].End+runs2[i].Start)/2] = true
}
}
for i := 1; i < len(runs4); i++ {
gap := runs4[i].Start - runs4[i-1].End
// Use lower threshold for y=58 gaps - they often represent real digit boundaries
// even when the gap is small (like between "9" and "2")
if gap > 2 {
splitSet[(runs4[i-1].End+runs4[i].Start)/2] = true
}
}
// Convert to sorted list
var splits []int
for sp := range splitSet {
splits = append(splits, sp)
}
for i := 0; i < len(splits); i++ {
for j := i + 1; j < len(splits); j++ {
if splits[j] < splits[i] {
splits[i], splits[j] = splits[j], splits[i]
}
}
}
// Remove splits that are too close together (<20px)
var filteredSplits []int
for _, sp := range splits {
if len(filteredSplits) == 0 || sp-filteredSplits[len(filteredSplits)-1] > 20 {
filteredSplits = append(filteredSplits, sp)
}
}
splits = filteredSplits
// Create digit boundaries
totalWidth := contentEnd - contentStart
pos := contentStart
for _, sp := range splits {
if sp > pos && sp < contentEnd {
digits = append(digits, Digit{Start: pos, End: sp})
pos = sp
}
}
digits = append(digits, Digit{Start: pos, End: contentEnd})
// SIMPLIFIED APPROACH: Numbers are either 2 digits OR 3 digits starting with "1"
// Detect leading "1" using multiple strategies:
// 1. Narrow region at BOTH scan lines followed by gap at both (ideal case)
// 2. Narrow first run at y=58 followed by gap (when "1" merges with next digit at y=29)
hasLeadingOne := false
oneEndPos := contentStart
// Strategy 1: Scan columns looking for narrow both-white region followed by both-dark gap
scanLimit := contentStart + totalWidth*3/10
inWhite := false
whiteStart := 0
for x := contentStart; x < scanLimit; x++ {
isWhite := img.GetUCharAt(y2, x) > 128 && img.GetUCharAt(y4, x) > 128
if isWhite && !inWhite {
whiteStart = x
inWhite = true
} else if !isWhite && inWhite {
whiteWidth := x - whiteStart
if whiteWidth > 0 && whiteWidth < totalWidth/4 {
minGapWidth := totalWidth / 20
trueGapWidth := 0
for x2 := x; x2 < contentEnd; x2++ {
bothDark := img.GetUCharAt(y2, x2) <= 128 && img.GetUCharAt(y4, x2) <= 128
if bothDark {
trueGapWidth++
} else {
break
}
}
if trueGapWidth >= minGapWidth {
hasLeadingOne = true
oneEndPos = x
}
}
break
}
}
// Strategy 2: If not found, check if first run at y=58 is narrow with gap after
// This catches "1" that merges with next digit at y=29 but separates at y=58
// IMPORTANT: Also verify y=29 has a narrow first run - this distinguishes "1" from "9"
// "9" has narrow stem at y=58 but WIDE top with hole at y=29
// "1" is narrow at BOTH scan lines
if !hasLeadingOne && len(runs4) >= 2 && len(runs2) >= 1 {
firstRun4 := runs4[0]
firstRunWidth4 := firstRun4.Width()
gapAfterFirst := runs4[1].Start - firstRun4.End
// Check y=29 first run is also narrow (< 25% of total)
// "1" should be narrow at both lines; "9" has wide top
firstRun2 := runs2[0]
firstRunWidth2 := firstRun2.Width()
// "1" at y=58 is narrow (< 20% of total) and positioned in first third
// Relaxed position check to 33% to handle merged displays
if firstRunWidth4 < totalWidth/5 && firstRun4.End < contentStart+totalWidth/3 {
// Additional check: y=29 first run must also be narrow (< 30% of total)
// This prevents "9" (which has wide top) from being detected as "1"
// Relaxed from 25% to 30% because "1" can merge slightly with next digit
if firstRunWidth2 < totalWidth*3/10 {
if gapAfterFirst > totalWidth/30 || len(runs4) >= 3 {
hasLeadingOne = true
// Use second run's start as boundary (where digit 2 begins)
oneEndPos = runs4[1].Start
}
}
}
}
// Strategy 3: Detect 3-digit numbers when "1" and "0" merge at y=58
// This happens when the "1" stem connects with the "0" at the bottom
// Indicators: wide total content (>100px), 3+ runs at y=29, first y=58 run is very wide
if !hasLeadingOne && totalWidth > 100 && len(runs2) >= 3 && len(runs4) >= 2 {
// For "104": y=58 shows [1+0 merged ~50%] [4 ~35%]
// The first run being very wide (> 40% of total) suggests merged digits
firstRun4 := runs4[0]
firstRunWidth := firstRun4.Width()
// If first run is very wide (> 40% of content), it's likely "1"+"0" merged
// This distinguishes from 2-digit numbers where each digit is ~50%
if firstRunWidth > totalWidth*40/100 && firstRunWidth < totalWidth*65/100 {
hasLeadingOne = true
// For merged "1"+"0", split at 1/3 of total width for the "1"
oneEndPos = contentStart + totalWidth/3
}
}
// Build digit list based on detection
digits = nil
if hasLeadingOne {
// 3 digits: "1" + 2 more digits
// For 3 runs at y=58: run0="1", run1="0", run2="2" (each run = one digit)
// Use run starts as digit boundaries
digits = append(digits, Digit{Start: contentStart, End: oneEndPos, IsOne: true})
secondEnd := (oneEndPos + contentEnd) / 2 // fallback
if len(runs4) >= 3 {
// Digit 2 ends where digit 3's run begins
secondEnd = runs4[2].Start
} else if len(runs4) == 2 {
// Strategy 3 case: 2 runs at y=58 where first is merged "1"+"0"
// Use last run's start as boundary (that's where digit 3 begins)
lastRunStart := runs4[1].Start
// Only use if it's in the right 40% of the content (digit 3 area)
if lastRunStart > contentStart+totalWidth*55/100 {
secondEnd = lastRunStart
}
}
digits = append(digits, Digit{Start: oneEndPos, End: secondEnd})
digits = append(digits, Digit{Start: secondEnd, End: contentEnd})
} else {
// 2 digits - split at the LARGEST gap at y=51 that's in the middle region (30-70%)
// This is the digit boundary; gaps outside this range are likely artifacts
if len(runs4) >= 2 {
// Find the largest gap between consecutive runs that's in the middle region
maxGap := 0
mid := (contentStart + contentEnd) / 2
splitPos := mid // fallback to center
minSplit := contentStart + totalWidth*30/100
maxSplit := contentStart + totalWidth*70/100
for i := 1; i < len(runs4); i++ {
gapMid := (runs4[i-1].End + runs4[i].Start) / 2
// Only consider gaps in the middle region
if gapMid >= minSplit && gapMid <= maxSplit {
gap := runs4[i].Start - runs4[i-1].End
if gap > maxGap {
maxGap = gap
splitPos = gapMid
}
}
}
digits = append(digits, Digit{Start: contentStart, End: splitPos})
digits = append(digits, Digit{Start: splitPos, End: contentEnd})
} else {
// Equal split
mid := (contentStart + contentEnd) / 2
digits = append(digits, Digit{Start: contentStart, End: mid})
digits = append(digits, Digit{Start: mid, End: contentEnd})
}
}
// NOTE: Previously we checked for detected "1"s and marked digits as IsOne.
// This caused false positives when narrow runs from other digits (like "4", "5")
// were mistakenly identified as "1"s. Now we rely solely on the leading "1"
// detection strategies (1/2/3) which only mark the FIRST digit as IsOne when
// it's a 3-digit number starting with "1". The scan-line classification
// handles all other "1" detection.
// Assign runs to digits with CLIPPING to digit boundaries
// This handles runs that span multiple digits (e.g., merged "1" + "0")
// Convert run coordinates to be RELATIVE to digit start
// Use different thresholds: y=29 needs 8px (avoid boundary artifacts),
// y=58 needs only 4px (to catch narrow hole edges like right side of "0")
minRunWidth2 := 8 // for y=29
minRunWidth4 := 4 // for y=58
for i := range digits {
d := &digits[i]
for _, r := range runs2 {
overlapStart := max(r.Start, d.Start)
overlapEnd := min(r.End, d.End)
if overlapEnd-overlapStart >= minRunWidth2 {
relRun := WhiteRun{Start: overlapStart - d.Start, End: overlapEnd - d.Start}
d.Runs2 = append(d.Runs2, relRun)
}
}
for _, r := range runs4 {
overlapStart := max(r.Start, d.Start)
overlapEnd := min(r.End, d.End)
if overlapEnd-overlapStart >= minRunWidth4 {
relRun := WhiteRun{Start: overlapStart - d.Start, End: overlapEnd - d.Start}
d.Runs4 = append(d.Runs4, relRun)
}
}
}
// Debug image
vis := gocv.NewMat()
gocv.CvtColor(img, &vis, gocv.ColorGrayToBGR)
gocv.Line(&vis, image.Point{0, y2}, image.Point{cols, y2}, color.RGBA{255, 255, 0, 255}, 1)
gocv.Line(&vis, image.Point{0, y4}, image.Point{cols, y4}, color.RGBA{255, 0, 255, 255}, 1)
for _, d := range digits {
c := color.RGBA{0, 255, 0, 255}
if d.IsOne {
c = color.RGBA{255, 0, 0, 255} // red for "1"s
}
gocv.Line(&vis, image.Point{d.Start, 0}, image.Point{d.Start, rows}, c, 1)
gocv.Line(&vis, image.Point{d.End, 0}, image.Point{d.End, rows}, c, 1)
}
if cols < 100 {
gocv.IMWrite("debug_cuts_left.png", vis)
} else {
gocv.IMWrite("debug_cuts_right.png", vis)
}
vis.Close()
// Determine side for labeling
side := "left"
if cols > 100 {
side = "right"
}
// Classify each digit
var result string
for i, d := range digits {
var digit string
if d.IsOne {
digit = "1"
} else {
// Calculate fingerprint
white2 := 0
for _, r := range d.Runs2 {
white2 += r.Width()
}
white4 := 0
for _, r := range d.Runs4 {
white4 += r.Width()
}
width := d.End - d.Start
p2 := ((white2 * 100 / width) + 2) / 5 * 5
p4 := ((white4 * 100 / width) + 2) / 5 * 5
fingerprint := fmt.Sprintf("%d_%d", p2, p4)
// Check pattern store first
if patternStore != nil {
if val, ok := patternStore.Lookup(fingerprint); ok {
if val >= 0 && val <= 9 {
digit = fmt.Sprintf("%d", val)
fmt.Printf(" [%d-%d] fp=%s -> %s (learned)\n", d.Start, d.End, fingerprint, digit)
} else {
// val == -1 means ignore/unknown
digit = "?"
fmt.Printf(" [%d-%d] fp=%s -> ? (marked ignore)\n", d.Start, d.End, fingerprint)
}
}
}
// If not found in store, save for labeling and use best guess
if digit == "" {
// Save digit image for labeling
digitImg := img.Region(image.Rect(d.Start, 0, d.End, rows))
imgPath := fmt.Sprintf("digit_%s_%d_%s.png", side, i, fingerprint)
gocv.IMWrite(imgPath, digitImg)
// Add to unlabeled queue
AddUnlabeled(fingerprint, imgPath, side)
// Use fingerprint matcher as fallback
digit = classifyByFingerprint(p2, p4)
fmt.Printf(" [%d-%d] fp=%s -> %s (guessed, queued for labeling)\n", d.Start, d.End, fingerprint, digit)
}
}
fmt.Printf(" [%d-%d] runs2=%d runs4=%d -> %s\n",
d.Start, d.End, len(d.Runs2), len(d.Runs4), digit)
if digit == "" || digit == "?" {
return ""
}
result += digit
}
return result
}
// classifyByFingerprint uses p2/p4 percentages to guess the digit
// This is a fallback when the pattern isn't in the learned store
func classifyByFingerprint(p2, p4 int) string {
// Fingerprint table: [p2, p4] -> digit
// Based on 7-segment display patterns:
// _ At 2/5: middle area (left+right segments or holes)
// |_| At 4/5: lower area (bottom segments)
// |_|
//
// Approximate expected percentages (will need calibration):
// "0": ~40-50% at both (left+right segments, no middle bar visible)
// "1": ~15-25% at both (just right stem)
// "2": ~50-60% at 2/5 (top curve), ~70-80% at 4/5 (bottom bar)
// "3": ~50-60% at both (curves on right)
// "4": ~50-60% at 2/5 (left+right+middle), ~20-30% at 4/5 (just right stem)
// "5": ~50-60% at 2/5 (top+left), ~40-50% at 4/5 (bottom curve)
// "6": ~25-35% at 2/5 (left stem), ~60-70% at 4/5 (bottom loop)
// "7": ~50-60% at 2/5 (top bar), ~20-30% at 4/5 (diagonal stem)
// "8": ~40-50% at both (holes visible at both)
// "9": ~40-50% at 2/5 (top loop), ~40-50% at 4/5 (stem + bottom)
// Match by finding closest pattern
// Format: {p2, p4, digit}
patterns := []struct {
p2, p4 int
digit string
}{
{45, 70, "0"}, // wide at both, slightly wider at bottom
{20, 20, "1"}, // narrow at both
{55, 75, "2"}, // medium top, wide bottom bar
{55, 50, "3"}, // medium at both, right-heavy
{55, 25, "4"}, // wide top (hole), narrow bottom (stem)
{55, 45, "5"}, // medium top, medium-right bottom
{30, 65, "6"}, // narrow top (stem), wide bottom (loop)
{55, 25, "7"}, // wide top (bar), narrow bottom (stem) - same as 4
{45, 45, "8"}, // medium at both (holes)
{45, 50, "9"}, // medium top (loop), medium bottom (stem+bottom)
}
// Find best match using Manhattan distance
bestDist := 1000
bestDigit := "?"
for _, pat := range patterns {
dist := abs(p2-pat.p2) + abs(p4-pat.p4)
if dist < bestDist {
bestDist = dist
bestDigit = pat.digit
}
}
return bestDigit
}
// classifyDigitByFeatures classifies a single digit using hole and stroke analysis
// Returns the digit as a string ("0"-"9") or "" if uncertain
func classifyDigitByFeatures(img gocv.Mat) string {
rows, cols := img.Rows(), img.Cols()
if rows < 5 || cols < 3 {
return ""
}
holes := findHoles(img)
imgArea := rows * cols
// Debug output
fmt.Printf(" [Digit %dx%d] holes=%d", cols, rows, len(holes))
for i, h := range holes {
fmt.Printf(" h%d(y=%d,area=%d)", i, h.CenterY, h.Area)
}
// === HOLE-BASED CLASSIFICATION ===
if len(holes) == 2 {
fmt.Println(" -> 8 (2 holes)")
return "8"
}
if len(holes) == 1 {
hole := holes[0]
midY := rows / 2
// Hole in top half → 9
if hole.CenterY < midY {
fmt.Printf(" -> 9 (hole in top, centerY=%d < midY=%d)\n", hole.CenterY, midY)
return "9"
}
// Hole in bottom half → 6
if hole.CenterY > midY {
fmt.Printf(" -> 6 (hole in bottom, centerY=%d > midY=%d)\n", hole.CenterY, midY)
return "6"
}
// Hole exactly at middle - check size
// Large hole (>10% of image area) → 0
// Small hole → 4
if hole.Area > imgArea*10/100 {
fmt.Printf(" -> 0 (large hole at middle, area=%d > %d)\n", hole.Area, imgArea*10/100)
return "0"
}
fmt.Printf(" -> 4 (small hole at middle, area=%d <= %d)\n", hole.Area, imgArea*10/100)
return "4"
}
fmt.Print(" no-hole:")
// === NO-HOLE CLASSIFICATION ===
// First find the bounding box of white pixels
minX, maxX := cols, 0
minY, maxY := rows, 0
for y := 0; y < rows; y++ {
for x := 0; x < cols; x++ {
if img.GetUCharAt(y, x) > 0 {
if x < minX { minX = x }
if x > maxX { maxX = x }
if y < minY { minY = y }
if y > maxY { maxY = y }
}
}
}
digitWidth := maxX - minX + 1
digitHeight := maxY - minY + 1
// Check for narrow vertical (1) FIRST - before other checks
// "1" is tall and narrow (height/width ratio > 2)
if digitWidth > 0 && digitHeight > digitWidth*2 {
fmt.Printf(" narrow ratio h/w=%d/%d=%.1f -> 1\n", digitHeight, digitWidth, float64(digitHeight)/float64(digitWidth))
return "1"
}
// Check for bar across top (5 or 7)
topRegion := rows / 5
topMinX, topMaxX := cols, 0
for y := 0; y < topRegion; y++ {
for x := 0; x < cols; x++ {
if img.GetUCharAt(y, x) > 0 {
if x < topMinX { topMinX = x }
if x > topMaxX { topMaxX = x }
}
}
}
topSpan := topMaxX - topMinX
hasTopBar := topSpan > cols*50/100 // spans more than 50% of width
if hasTopBar {
// Distinguish 5 vs 7: check bottom-left quadrant
bottomLeftPixels := 0
for y := rows * 2 / 3; y < rows; y++ {
for x := 0; x < cols/2; x++ {
if img.GetUCharAt(y, x) > 0 {
bottomLeftPixels++
}
}
}
// 5 has content in bottom-left, 7 doesn't
if bottomLeftPixels > rows*cols/50 {
fmt.Printf(" topBar+bottomLeft=%d -> 5\n", bottomLeftPixels)
return "5"
}
fmt.Printf(" topBar, no bottomLeft=%d -> 7\n", bottomLeftPixels)
return "7"
}
// Check for horizontal bar in center (3)
// 3 has content spanning the middle horizontally
midY := rows / 2
centerRegion := rows / 6
centerMinX, centerMaxX := cols, 0
for y := midY - centerRegion; y < midY+centerRegion; y++ {
for x := 0; x < cols; x++ {
if img.GetUCharAt(y, x) > 0 {
if x < centerMinX { centerMinX = x }
if x > centerMaxX { centerMaxX = x }
}
}
}
centerSpan := centerMaxX - centerMinX
// Also check that 3 has right-side content in center
rightCenterPixels := 0
for y := midY - centerRegion; y < midY+centerRegion; y++ {
for x := cols * 2 / 3; x < cols; x++ {
if img.GetUCharAt(y, x) > 0 {
rightCenterPixels++
}
}
}
if centerSpan > cols*40/100 && rightCenterPixels > rows*cols/100 {
fmt.Printf(" centerSpan=%d, rightCenter=%d -> 3\n", centerSpan, rightCenterPixels)
return "3"
}
// Leftover → 2
fmt.Printf(" width=%d, centerSpan=%d -> 2 (leftover)\n", digitWidth, centerSpan)
return "2"
}
// findHolePositions finds enclosed regions (holes) and returns their X positions as percentages (0-9)
// Uses connected components on inverted image, excluding border-touching regions
func findHolePositions(img gocv.Mat) []int {
rows, cols := img.Rows(), img.Cols()
if rows < 3 || cols < 3 {
return nil
}
// Invert image (holes become white)
inverted := gocv.NewMat()
gocv.BitwiseNot(img, &inverted)
defer inverted.Close()
// Find connected components
labels := gocv.NewMat()
numLabels := gocv.ConnectedComponents(inverted, &labels)
defer labels.Close()
if numLabels <= 1 {
return nil
}
// Check which labels touch the border (those are background, not holes)
touchesBorder := make([]bool, numLabels)
for x := 0; x < cols; x++ {
touchesBorder[int(labels.GetIntAt(0, x))] = true
touchesBorder[int(labels.GetIntAt(rows-1, x))] = true
}
for y := 0; y < rows; y++ {
touchesBorder[int(labels.GetIntAt(y, 0))] = true
touchesBorder[int(labels.GetIntAt(y, cols-1))] = true
}
// Find center X of each hole and convert to percentage (0-9)
var positions []int
for label := 1; label < numLabels; label++ {
if touchesBorder[label] {
continue
}
// Find bounding box of this hole
minX, maxX := cols, 0
for y := 0; y < rows; y++ {
for x := 0; x < cols; x++ {
if int(labels.GetIntAt(y, x)) == label {
if x < minX { minX = x }
if x > maxX { maxX = x }
}
}
}
// Center X as percentage (0-9)
centerX := (minX + maxX) / 2
pct := centerX * 10 / cols
if pct > 9 {
pct = 9
}
positions = append(positions, pct)
}
// Sort positions
for i := 0; i < len(positions)-1; i++ {
for j := i + 1; j < len(positions); j++ {
if positions[j] < positions[i] {
positions[i], positions[j] = positions[j], positions[i]
}
}
}
return positions
}
// extractSkeletonPatterns extracts patterns using spatial division approach:
// 1. Crop to digit bounds (exclude noise not reaching top)
// 2. Check for leading "1" (narrow stroke on left)
// 3. Split remaining area in half for the two main digits
// 4. Analyze each with top/bottom half shape method
// 5. Count holes and append to fingerprint
func extractSkeletonPatterns(img gocv.Mat) []string {
// First find digit bounds (components reaching top), then crop
bounds := getDigitBounds(img)
croppedRegion := img.Region(bounds)
cropped := gocv.NewMat()
croppedRegion.CopyTo(&cropped)
croppedRegion.Close()
defer cropped.Close()
// Find hole positions before skeletonization (on the filled shapes)
holePositions := findHolePositions(cropped)
// Now skeletonize the cropped digit area
skeleton := zhangSuenThinning(cropped)
defer skeleton.Close()
rows, cols := skeleton.Rows(), skeleton.Cols()
var patterns []string
// Check for leading "1": look at leftmost 12% of image
// A "1" must: have vertical content AND have a gap after it
leadingWidth := cols * 12 / 100
gapStart := leadingWidth
gapEnd := cols * 20 / 100
hasLeadingOne := false
// Count pixels in leading region
leadingPixels := 0
for y := 0; y < rows; y++ {
for x := 0; x < leadingWidth; x++ {
if skeleton.GetUCharAt(y, x) > 0 {
leadingPixels++
}
}
}
// Count pixels in gap region (should be empty for a true "1")
gapPixels := 0
for y := 0; y < rows; y++ {
for x := gapStart; x < gapEnd; x++ {
if skeleton.GetUCharAt(y, x) > 0 {
gapPixels++
}
}
}
// If there are pixels in leading region AND gap is mostly empty
if leadingPixels > rows/2 && gapPixels < rows/4 {
// Check if pixels are vertically aligned (narrow X spread)
minX, maxX := cols, 0
for y := 0; y < rows; y++ {
for x := 0; x < leadingWidth; x++ {
if skeleton.GetUCharAt(y, x) > 0 {
if x < minX { minX = x }
if x > maxX { maxX = x }
}
}
}
if maxX-minX < leadingWidth {
hasLeadingOne = true
patterns = append(patterns, "1")
}
}
// Determine where the two main digits start
startX := 0
if hasLeadingOne {
startX = leadingWidth
}
// Split remaining area in half for two digits
remainingWidth := cols - startX
midX := startX + remainingWidth/2
// Extract left digit region
leftDigit := skeleton.Region(image.Rect(startX, 0, midX, rows))
leftPattern := classifyDigit(leftDigit)
leftDigit.Close()
if leftPattern != "" {
patterns = append(patterns, leftPattern)
}
// Extract right digit region
rightDigit := skeleton.Region(image.Rect(midX, 0, cols, rows))
rightPattern := classifyDigit(rightDigit)
rightDigit.Close()
if rightPattern != "" {
patterns = append(patterns, rightPattern)
}
// Append hole positions (e.g., "h27" means holes at 20% and 70% of width)
if len(holePositions) > 0 && len(holePositions) <= 4 {
h := "h"
for _, p := range holePositions {
h += fmt.Sprintf("%d", p)
}
patterns = append(patterns, h)
}
return patterns
}