2337 lines
61 KiB
Go
2337 lines
61 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"image"
|
|
"image/color"
|
|
"os"
|
|
"os/signal"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"gocv.io/x/gocv"
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// Config holds application configuration
|
|
type Config struct {
|
|
Camera struct {
|
|
RTSPURL string `yaml:"rtsp_url"`
|
|
} `yaml:"camera"`
|
|
HomeAssistant struct {
|
|
URL string `yaml:"url"`
|
|
Token string `yaml:"token"`
|
|
} `yaml:"home_assistant"`
|
|
OCR struct {
|
|
APIKey string `yaml:"api_key"`
|
|
} `yaml:"ocr"`
|
|
Processing struct {
|
|
IntervalSeconds int `yaml:"interval_seconds"`
|
|
} `yaml:"processing"`
|
|
}
|
|
|
|
func init() {
|
|
// Suppress GStreamer and OpenCV warnings
|
|
os.Setenv("OPENCV_LOG_LEVEL", "ERROR")
|
|
os.Setenv("GST_DEBUG", "0")
|
|
}
|
|
|
|
func loadConfig(path string) (*Config, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var cfg Config
|
|
if err := yaml.Unmarshal(data, &cfg); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Defaults
|
|
if cfg.Processing.IntervalSeconds == 0 {
|
|
cfg.Processing.IntervalSeconds = 2
|
|
}
|
|
|
|
return &cfg, nil
|
|
}
|
|
|
|
func main() {
|
|
// Load config
|
|
cfg, err := loadConfig("config.yaml")
|
|
if err != nil {
|
|
fmt.Printf("Failed to load config: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Set up components
|
|
capture, err := NewCapture(cfg.Camera.RTSPURL)
|
|
if err != nil {
|
|
fmt.Printf("Failed to connect to camera: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
defer capture.Close()
|
|
|
|
// Load pattern store
|
|
patterns, err := NewPatternStore("patterns.csv")
|
|
if err != nil {
|
|
fmt.Printf("Failed to load patterns: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
fmt.Printf("Loaded %d patterns\n", patterns.Count())
|
|
|
|
// Initialize Home Assistant client
|
|
var hass *HASS
|
|
if cfg.HomeAssistant.URL != "" && cfg.HomeAssistant.Token != "" {
|
|
hass = NewHASS(cfg.HomeAssistant.URL, cfg.HomeAssistant.Token)
|
|
fmt.Println("Home Assistant integration enabled")
|
|
}
|
|
|
|
// Create training directory
|
|
os.MkdirAll("training", 0755)
|
|
|
|
// Start web server for training
|
|
StartWebServer(8090)
|
|
|
|
// Handle graceful shutdown
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
|
|
|
fmt.Println("Starting pulse monitor...")
|
|
|
|
// Find bar region (retry until valid)
|
|
var barRegion Region
|
|
for {
|
|
frame := capture.NextFrame()
|
|
if frame.Empty() {
|
|
frame.Close()
|
|
time.Sleep(time.Second)
|
|
continue
|
|
}
|
|
prep := preprocessFrame(frame)
|
|
frame.Close()
|
|
gocv.IMWrite("bw_image.png", prep)
|
|
barRegion = findBar(prep)
|
|
prep.Close()
|
|
fmt.Println("Saved bw_image.png and bars_visualization.png")
|
|
fmt.Printf("Bar region: (%d,%d) %dx%d\n", barRegion.X, barRegion.Y, barRegion.W, barRegion.H)
|
|
if barRegion.W > 0 && barRegion.H > 0 {
|
|
break
|
|
}
|
|
fmt.Println("Invalid bar region, retrying in 2s...")
|
|
time.Sleep(2 * time.Second)
|
|
}
|
|
|
|
// Track last known values (for display and HASS)
|
|
var lastLeft, lastRight string
|
|
var lastPostedLeft, lastPostedRight int
|
|
var prevDisplayLeft, prevDisplayRight string // for CR/LF logic
|
|
|
|
// Rate limit: 2 frames per second
|
|
lastProcess := time.Now()
|
|
processInterval := 500 * time.Millisecond
|
|
|
|
// Main loop - detect changes in left/right
|
|
for {
|
|
select {
|
|
case <-sigChan:
|
|
fmt.Printf("\nShutting down... Patterns: %d\n", patterns.Count())
|
|
GenerateHTML("training")
|
|
return
|
|
default:
|
|
}
|
|
|
|
frame := capture.NextFrame()
|
|
if frame.Empty() {
|
|
frame.Close()
|
|
time.Sleep(100 * time.Millisecond)
|
|
continue
|
|
}
|
|
|
|
// Skip processing if not enough time has passed
|
|
if time.Since(lastProcess) < processInterval {
|
|
frame.Close()
|
|
continue
|
|
}
|
|
lastProcess = time.Now()
|
|
|
|
prep := preprocessFrame(frame)
|
|
frame.Close()
|
|
|
|
// Crop bar and find digit regions
|
|
barCrop := CropRegion(prep, barRegion)
|
|
rows := barCrop.Rows()
|
|
|
|
// Find digit boundaries
|
|
spo2Start, spo2End, hrStart, hrEnd := visualizeColumns(barCrop)
|
|
|
|
// Crop tightly to each digit region
|
|
leftRegion := barCrop.Region(image.Rect(spo2Start, 0, spo2End, rows))
|
|
leftCrop := gocv.NewMat()
|
|
leftRegion.CopyTo(&leftCrop)
|
|
leftRegion.Close()
|
|
|
|
rightRegion := barCrop.Region(image.Rect(hrStart, 0, hrEnd, rows))
|
|
rightCrop := gocv.NewMat()
|
|
rightRegion.CopyTo(&rightCrop)
|
|
rightRegion.Close()
|
|
|
|
leftDigitCrop := leftCrop
|
|
rightDigitCrop := rightCrop
|
|
|
|
// Add 1px black border so contours don't touch edges (FindContours ignores border-touching shapes)
|
|
leftPadded := gocv.NewMatWithSize(leftCrop.Rows()+2, leftCrop.Cols()+2, leftCrop.Type())
|
|
rightPadded := gocv.NewMatWithSize(rightCrop.Rows()+2, rightCrop.Cols()+2, rightCrop.Type())
|
|
leftRegionPad := leftPadded.Region(image.Rect(1, 1, leftCrop.Cols()+1, leftCrop.Rows()+1))
|
|
rightRegionPad := rightPadded.Region(image.Rect(1, 1, rightCrop.Cols()+1, rightCrop.Rows()+1))
|
|
leftCrop.CopyTo(&leftRegionPad)
|
|
rightCrop.CopyTo(&rightRegionPad)
|
|
leftRegionPad.Close()
|
|
rightRegionPad.Close()
|
|
|
|
// Vectorize contours for shape comparison (RetrievalList = all contours including holes)
|
|
leftContours := gocv.FindContours(leftPadded, gocv.RetrievalList, gocv.ChainApproxSimple)
|
|
rightContours := gocv.FindContours(rightPadded, gocv.RetrievalList, gocv.ChainApproxSimple)
|
|
leftPadded.Close()
|
|
rightPadded.Close()
|
|
|
|
// Recognize numbers using hole/stroke analysis
|
|
leftResult := recognizeNumber(leftDigitCrop)
|
|
rightResult := recognizeNumber(rightDigitCrop)
|
|
|
|
// Update values if recognized
|
|
if leftResult != "" {
|
|
lastLeft = leftResult
|
|
if hass != nil {
|
|
if val, err := strconv.Atoi(leftResult); err == nil && val != lastPostedLeft {
|
|
hass.PostSpO2(val)
|
|
lastPostedLeft = val
|
|
}
|
|
}
|
|
}
|
|
|
|
if rightResult != "" {
|
|
lastRight = rightResult
|
|
if hass != nil {
|
|
if val, err := strconv.Atoi(rightResult); err == nil && val != lastPostedRight {
|
|
hass.PostHR(val)
|
|
lastPostedRight = val
|
|
}
|
|
}
|
|
}
|
|
if lastLeft != "" || lastRight != "" {
|
|
timeStr := time.Now().Format("15:04:05")
|
|
valuesChanged := lastLeft != prevDisplayLeft || lastRight != prevDisplayRight
|
|
if valuesChanged {
|
|
fmt.Printf("\n%s SpO2: %-3s HR: %-3s", timeStr, lastLeft, lastRight)
|
|
prevDisplayLeft, prevDisplayRight = lastLeft, lastRight
|
|
} else {
|
|
fmt.Printf("\r%s SpO2: %-3s HR: %-3s", timeStr, lastLeft, lastRight)
|
|
}
|
|
}
|
|
|
|
// Cleanup
|
|
leftContours.Close()
|
|
rightContours.Close()
|
|
leftCrop.Close()
|
|
rightCrop.Close()
|
|
barCrop.Close()
|
|
prep.Close()
|
|
}
|
|
}
|
|
|
|
// preprocessFrame crops timestamp and rotates the frame
|
|
func preprocessFrame(frame gocv.Mat) gocv.Mat {
|
|
// Crop timestamp (top 68 pixels)
|
|
cropped := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows()))
|
|
rotated := gocv.NewMat()
|
|
gocv.Rotate(cropped, &rotated, gocv.Rotate90Clockwise)
|
|
cropped.Close()
|
|
|
|
// Convert to grayscale then threshold to B&W
|
|
gray := gocv.NewMat()
|
|
gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray)
|
|
rotated.Close()
|
|
|
|
bw := gocv.NewMat()
|
|
gocv.Threshold(gray, &bw, 210, 255, gocv.ThresholdBinary)
|
|
gray.Close()
|
|
return bw
|
|
}
|
|
|
|
// parseNumber extracts a number from string
|
|
func parseNumber(s string) int {
|
|
var n int
|
|
for _, c := range s {
|
|
if c >= '0' && c <= '9' {
|
|
n = n*10 + int(c-'0')
|
|
}
|
|
}
|
|
return n
|
|
}
|
|
|
|
// abs returns the absolute value of an int
|
|
func abs(x int) int {
|
|
if x < 0 {
|
|
return -x
|
|
}
|
|
return x
|
|
}
|
|
|
|
// findBar detects the digit bar region using B&W image analysis
|
|
func findBar(frame gocv.Mat) Region {
|
|
// Input is B&W (0 or 255) from preprocessFrame
|
|
rows := frame.Rows()
|
|
cols := frame.Cols()
|
|
|
|
// Count white pixels per row
|
|
whiteCount := make([]int, rows)
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if frame.GetUCharAt(y, x) > 0 {
|
|
whiteCount[y]++
|
|
}
|
|
}
|
|
}
|
|
|
|
// Find ALL bars - transitions between rows with/without white pixels
|
|
type Bar struct {
|
|
Start, End int
|
|
HasWhite bool
|
|
WhitePixels int
|
|
}
|
|
|
|
var bars []Bar
|
|
var transitions []int
|
|
hasWhite := whiteCount[0] > 0
|
|
barStart := 0
|
|
var pixelSum int
|
|
|
|
transitions = append(transitions, 0)
|
|
|
|
for y := 0; y < rows; y++ {
|
|
rowHasWhite := whiteCount[y] > 0
|
|
pixelSum += whiteCount[y]
|
|
|
|
if rowHasWhite != hasWhite || y == rows-1 {
|
|
height := y - barStart
|
|
if height >= 5 {
|
|
bars = append(bars, Bar{
|
|
Start: barStart,
|
|
End: y - 1,
|
|
HasWhite: hasWhite,
|
|
WhitePixels: pixelSum,
|
|
})
|
|
transitions = append(transitions, y)
|
|
}
|
|
barStart = y
|
|
pixelSum = whiteCount[y]
|
|
hasWhite = rowHasWhite
|
|
}
|
|
}
|
|
|
|
// Save visualization
|
|
vis := gocv.NewMat()
|
|
gocv.CvtColor(frame, &vis, gocv.ColorGrayToBGR)
|
|
|
|
for _, y := range transitions {
|
|
gocv.Line(&vis, image.Point{0, y}, image.Point{cols, y}, color.RGBA{255, 0, 0, 255}, 2)
|
|
gocv.PutText(&vis, fmt.Sprintf("%d", y), image.Point{10, y - 5}, gocv.FontHersheyPlain, 1.5, color.RGBA{0, 255, 0, 255}, 2)
|
|
}
|
|
gocv.IMWrite("bars_visualization.png", vis)
|
|
vis.Close()
|
|
|
|
// Find first and last white bar to define content bounds
|
|
firstWhite, lastWhite := -1, -1
|
|
for i, b := range bars {
|
|
if b.HasWhite {
|
|
if firstWhite == -1 {
|
|
firstWhite = i
|
|
}
|
|
lastWhite = i
|
|
}
|
|
}
|
|
|
|
// Calculate vertical span of white content
|
|
var contentStart, contentEnd int
|
|
if firstWhite >= 0 && lastWhite >= 0 {
|
|
contentStart = bars[firstWhite].Start
|
|
contentEnd = bars[lastWhite].End
|
|
}
|
|
contentSpan := contentEnd - contentStart
|
|
|
|
// Print bars with position as percentage of white content span
|
|
fmt.Println("Detected bars (% = position within white content):")
|
|
for _, b := range bars {
|
|
t := "BLACK"
|
|
if b.HasWhite {
|
|
t = "WHITE"
|
|
}
|
|
height := b.End - b.Start + 1
|
|
|
|
// Calculate position as percentage of content span
|
|
startPct := float64(b.Start-contentStart) / float64(contentSpan) * 100
|
|
endPct := float64(b.End-contentStart) / float64(contentSpan) * 100
|
|
|
|
fmt.Printf(" Y=%d-%d (%d px) %s %.0f%%-%.0f%%\n", b.Start, b.End, height, t, startPct, endPct)
|
|
}
|
|
|
|
// Find the digit bar by position within content span
|
|
// It sits around 40-60% and has a significant black gap above it (>5%)
|
|
var digitBar Bar
|
|
found := false
|
|
for i, bar := range bars {
|
|
if !bar.HasWhite {
|
|
continue
|
|
}
|
|
|
|
// Calculate position as percentage of content span
|
|
startPct := float64(bar.Start-contentStart) / float64(contentSpan) * 100
|
|
endPct := float64(bar.End-contentStart) / float64(contentSpan) * 100
|
|
|
|
// Check if bar is in the 30%-70% range (middle of content)
|
|
if startPct < 30 || startPct > 70 {
|
|
continue
|
|
}
|
|
|
|
// Check black gap above (previous bar should be black with significant height)
|
|
if i > 0 {
|
|
prevBar := bars[i-1]
|
|
if !prevBar.HasWhite {
|
|
gapPct := float64(prevBar.End-prevBar.Start+1) / float64(contentSpan) * 100
|
|
// Need at least 5% black gap above
|
|
if gapPct >= 5 {
|
|
digitBar = bar
|
|
found = true
|
|
fmt.Printf("Selected digit bar: %.0f%%-%.0f%% (gap above: %.1f%%)\n", startPct, endPct, gapPct)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if !found {
|
|
return Region{}
|
|
}
|
|
|
|
// Find leftmost and rightmost white pixels in this bar
|
|
minX := cols
|
|
maxX := 0
|
|
for y := digitBar.Start; y <= digitBar.End; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if frame.GetUCharAt(y, x) > 0 {
|
|
if x < minX {
|
|
minX = x
|
|
}
|
|
if x > maxX {
|
|
maxX = x
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Save cropped bar
|
|
region := Region{
|
|
X: minX,
|
|
Y: digitBar.Start,
|
|
W: maxX - minX + 1,
|
|
H: digitBar.End - digitBar.Start + 1,
|
|
}
|
|
barCrop := CropRegion(frame, region)
|
|
gocv.IMWrite("bar_cropped.png", barCrop)
|
|
barCrop.Close()
|
|
fmt.Printf("Bar cropped: (%d,%d) %dx%d\n", region.X, region.Y, region.W, region.H)
|
|
|
|
return region
|
|
}
|
|
|
|
// parseValues extracts SpO2 and HR from API response like "SPO2:91 HR:117"
|
|
func parseValues(s string) (spo2, hr int) {
|
|
s = strings.ToUpper(s)
|
|
var nums []int
|
|
current := ""
|
|
for _, c := range s {
|
|
if c >= '0' && c <= '9' {
|
|
current += string(c)
|
|
} else if current != "" {
|
|
if n, err := strconv.Atoi(current); err == nil {
|
|
nums = append(nums, n)
|
|
}
|
|
current = ""
|
|
}
|
|
}
|
|
if current != "" {
|
|
if n, err := strconv.Atoi(current); err == nil {
|
|
nums = append(nums, n)
|
|
}
|
|
}
|
|
|
|
// Assign by range: SpO2 is 70-100, HR is 40-220
|
|
for _, n := range nums {
|
|
if n >= 70 && n <= 100 && spo2 == 0 {
|
|
spo2 = n
|
|
} else if n >= 40 && n <= 220 && hr == 0 {
|
|
hr = n
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// Region defines a rectangular area
|
|
type Region struct {
|
|
X, Y, W, H int
|
|
}
|
|
|
|
// ColumnType classifies a column's content
|
|
type ColumnType int
|
|
|
|
const (
|
|
ColEmpty ColumnType = iota // no white pixels
|
|
ColFullHeight // white pixels span most of height (digits)
|
|
ColBottomOnly // white pixels only in bottom portion (labels)
|
|
)
|
|
|
|
// classifyColumn determines if a column is empty, full-height (digit), or bottom-only (label)
|
|
func classifyColumn(img gocv.Mat, x int) ColumnType {
|
|
rows := img.Rows()
|
|
topThird := rows / 3
|
|
|
|
topPixels := 0
|
|
bottomPixels := 0
|
|
|
|
for y := 0; y < rows; y++ {
|
|
if img.GetUCharAt(y, x) > 0 {
|
|
if y < topThird {
|
|
topPixels++
|
|
} else {
|
|
bottomPixels++
|
|
}
|
|
}
|
|
}
|
|
|
|
totalPixels := topPixels + bottomPixels
|
|
if totalPixels < 2 {
|
|
return ColEmpty
|
|
}
|
|
|
|
// If has ANY content in top third, it's a full-height digit
|
|
if topPixels >= 1 {
|
|
return ColFullHeight
|
|
}
|
|
|
|
// Content only in bottom = small label text
|
|
return ColBottomOnly
|
|
}
|
|
|
|
// visualizeColumns draws column classification and finds digit regions
|
|
// Returns: spo2Start, spo2End, hrStart, hrEnd (x coordinates)
|
|
func visualizeColumns(img gocv.Mat) (int, int, int, int) {
|
|
rows, cols := img.Rows(), img.Cols()
|
|
|
|
// Classify all columns
|
|
colTypes := make([]ColumnType, cols)
|
|
for x := 0; x < cols; x++ {
|
|
colTypes[x] = classifyColumn(img, x)
|
|
}
|
|
|
|
// Find digit regions: contiguous runs of full-height columns
|
|
// with gaps of at least minGap EMPTY columns between them
|
|
// Labels (BOTTOM columns) don't count as gaps - they're adjacent to digits
|
|
// But we crop to the FULL columns only (excluding labels)
|
|
minGap := 30
|
|
type DigitRun struct{ Start, End int }
|
|
var runs []DigitRun
|
|
|
|
inRun := false
|
|
runStart := 0
|
|
lastFull := 0 // track last full-height column for tight cropping
|
|
emptyCount := 0
|
|
|
|
for x := 0; x < cols; x++ {
|
|
isDigit := colTypes[x] == ColFullHeight
|
|
isEmpty := colTypes[x] == ColEmpty
|
|
|
|
if isDigit {
|
|
if !inRun {
|
|
inRun = true
|
|
runStart = x
|
|
}
|
|
lastFull = x
|
|
emptyCount = 0
|
|
} else if inRun {
|
|
if isEmpty {
|
|
emptyCount++
|
|
}
|
|
// Only end run if we've seen enough truly empty columns
|
|
if emptyCount >= minGap {
|
|
// End current run at last FULL column (tight crop, excluding labels)
|
|
runs = append(runs, DigitRun{runStart, lastFull + 1})
|
|
inRun = false
|
|
}
|
|
}
|
|
}
|
|
if inRun {
|
|
runs = append(runs, DigitRun{runStart, lastFull + 1})
|
|
}
|
|
|
|
// Create visualization
|
|
vis := gocv.NewMat()
|
|
gocv.CvtColor(img, &vis, gocv.ColorGrayToBGR)
|
|
defer vis.Close()
|
|
|
|
// Draw column type markers
|
|
for x := 0; x < cols; x++ {
|
|
var c color.RGBA
|
|
switch colTypes[x] {
|
|
case ColEmpty:
|
|
c = color.RGBA{0, 0, 255, 255} // red
|
|
case ColFullHeight:
|
|
c = color.RGBA{0, 255, 0, 255} // green
|
|
case ColBottomOnly:
|
|
c = color.RGBA{255, 0, 0, 255} // blue
|
|
}
|
|
gocv.Line(&vis, image.Point{x, 0}, image.Point{x, 3}, c, 1)
|
|
}
|
|
|
|
// Draw vertical lines at run boundaries (white)
|
|
for _, run := range runs {
|
|
gocv.Line(&vis, image.Point{run.Start, 0}, image.Point{run.Start, rows}, color.RGBA{255, 255, 255, 255}, 1)
|
|
gocv.Line(&vis, image.Point{run.End, 0}, image.Point{run.End, rows}, color.RGBA{255, 255, 255, 255}, 1)
|
|
}
|
|
|
|
// Draw mid-line (cyan) - shows upper/lower half boundary for digit pattern analysis
|
|
gocv.Line(&vis, image.Point{0, rows/2}, image.Point{cols, rows/2}, color.RGBA{0, 255, 255, 255}, 1)
|
|
|
|
// Save debug image once, but always print first time
|
|
gocv.IMWrite("debug_columns.png", vis)
|
|
|
|
// Return early if we've printed before (use static-like behavior via file mod time)
|
|
fmt.Printf("Digit runs (minGap=%d): ", minGap)
|
|
for i, run := range runs {
|
|
fmt.Printf("Run%d[%d-%d] ", i, run.Start, run.End)
|
|
}
|
|
fmt.Println()
|
|
|
|
// Return SpO2 and HR boundaries
|
|
if len(runs) >= 2 {
|
|
return runs[0].Start, runs[0].End, runs[1].Start, runs[1].End
|
|
} else if len(runs) == 1 {
|
|
mid := (runs[0].Start + runs[0].End) / 2
|
|
return runs[0].Start, mid, mid, runs[0].End
|
|
}
|
|
return 0, cols/2, cols/2, cols
|
|
}
|
|
|
|
// findDigitRegions analyzes bar image and returns SpO2 and HR regions
|
|
// Uses column classification: empty, full-height (digits), bottom-only (labels)
|
|
func findDigitRegions(img gocv.Mat) (spo2 image.Rectangle, hr image.Rectangle) {
|
|
rows, cols := img.Rows(), img.Cols()
|
|
|
|
// Classify each column
|
|
colTypes := make([]ColumnType, cols)
|
|
for x := 0; x < cols; x++ {
|
|
colTypes[x] = classifyColumn(img, x)
|
|
}
|
|
|
|
// Find contiguous regions of full-height columns (digit regions)
|
|
type Region struct {
|
|
Start, End int
|
|
}
|
|
var digitRegions []Region
|
|
|
|
inRegion := false
|
|
regionStart := 0
|
|
for x := 0; x < cols; x++ {
|
|
if colTypes[x] == ColFullHeight {
|
|
if !inRegion {
|
|
inRegion = true
|
|
regionStart = x
|
|
}
|
|
} else {
|
|
if inRegion {
|
|
// End of region - must be at least 10% of width to count
|
|
if x - regionStart > cols/10 {
|
|
digitRegions = append(digitRegions, Region{regionStart, x})
|
|
}
|
|
inRegion = false
|
|
}
|
|
}
|
|
}
|
|
// Don't forget last region
|
|
if inRegion && cols - regionStart > cols/10 {
|
|
digitRegions = append(digitRegions, Region{regionStart, cols})
|
|
}
|
|
|
|
// Debug output
|
|
fmt.Printf("Found %d digit regions: ", len(digitRegions))
|
|
for _, r := range digitRegions {
|
|
pctStart := float64(r.Start) * 100 / float64(cols)
|
|
pctEnd := float64(r.End) * 100 / float64(cols)
|
|
fmt.Printf("[%d-%d (%.0f%%-%.0f%%)] ", r.Start, r.End, pctStart, pctEnd)
|
|
}
|
|
fmt.Println()
|
|
|
|
// We expect 2 digit regions: SpO2 (left) and HR (right)
|
|
if len(digitRegions) >= 2 {
|
|
// First region = SpO2, Second region = HR
|
|
spo2 = image.Rect(digitRegions[0].Start, 0, digitRegions[0].End, rows)
|
|
hr = image.Rect(digitRegions[1].Start, 0, digitRegions[1].End, rows)
|
|
} else if len(digitRegions) == 1 {
|
|
// Only one region found - split it in half
|
|
mid := (digitRegions[0].Start + digitRegions[0].End) / 2
|
|
spo2 = image.Rect(digitRegions[0].Start, 0, mid, rows)
|
|
hr = image.Rect(mid, 0, digitRegions[0].End, rows)
|
|
} else {
|
|
// Fallback - split whole image
|
|
spo2 = image.Rect(0, 0, cols/2, rows)
|
|
hr = image.Rect(cols/2, 0, cols, rows)
|
|
}
|
|
|
|
return spo2, hr
|
|
}
|
|
|
|
// CropRegion extracts a region from a Mat
|
|
func CropRegion(mat gocv.Mat, r Region) gocv.Mat {
|
|
rect := image.Rect(r.X, r.Y, r.X+r.W, r.Y+r.H)
|
|
region := mat.Region(rect)
|
|
cropped := gocv.NewMat()
|
|
region.CopyTo(&cropped)
|
|
region.Close()
|
|
return cropped
|
|
}
|
|
|
|
// DigitPattern holds a contour's direction pattern and position
|
|
type DigitPattern struct {
|
|
X int // leftmost X for sorting
|
|
Pattern string // direction chain
|
|
}
|
|
|
|
// extractDigitPatterns extracts digit patterns from contours, filtering small ones and sorting left-to-right
|
|
// Also tracks hole positions (as percentage of width) and appends to the pattern
|
|
// filterRightEdge: if true, ignore contours starting past 80% of width (for right side noise)
|
|
func extractDigitPatterns(contours gocv.PointsVector, imgHeight, imgWidth int, filterRightEdge bool) []string {
|
|
var patterns []DigitPattern
|
|
var holes []image.Rectangle // bounding boxes of potential holes
|
|
var digitBounds image.Rectangle // combined bounding box of all digits
|
|
|
|
// Size thresholds
|
|
minDigitHeight := imgHeight * 25 / 100 // outer digit: at least 25% height
|
|
minDigitWidth := imgWidth * 15 / 100 // outer digit: at least 15% width
|
|
minHoleSize := imgHeight * 10 / 100 // hole: at least 10% in either dimension
|
|
|
|
// First pass: find digits and potential holes
|
|
for i := 0; i < contours.Size(); i++ {
|
|
contour := contours.At(i)
|
|
rect := gocv.BoundingRect(contour)
|
|
|
|
isDigit := rect.Dy() >= minDigitHeight && rect.Dx() >= minDigitWidth
|
|
if filterRightEdge && rect.Min.X >= imgWidth*80/100 {
|
|
isDigit = false
|
|
}
|
|
isHole := rect.Dy() >= minHoleSize && rect.Dx() >= minHoleSize && rect.Min.Y < imgHeight*60/100
|
|
|
|
if isHole && !isDigit {
|
|
holes = append(holes, rect)
|
|
continue
|
|
}
|
|
|
|
if !isDigit {
|
|
continue
|
|
}
|
|
|
|
// Filter contours that start too low (labels are at bottom)
|
|
if rect.Min.Y > imgHeight*60/100 {
|
|
continue
|
|
}
|
|
|
|
// Expand digit bounds
|
|
if digitBounds.Empty() {
|
|
digitBounds = rect
|
|
} else {
|
|
digitBounds = digitBounds.Union(rect)
|
|
}
|
|
|
|
// Simplify and get direction pattern
|
|
epsilon := 0.01 * gocv.ArcLength(contour, true)
|
|
approx := gocv.ApproxPolyDP(contour, epsilon, true)
|
|
pts := approx.ToPoints()
|
|
|
|
if len(pts) > 2 {
|
|
dirs := pointsToDirections(pts)
|
|
pattern := strings.Join(dirs, "")
|
|
if len(pattern) > 0 {
|
|
patterns = append(patterns, DigitPattern{X: rect.Min.X, Pattern: pattern})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by X position (left to right)
|
|
for i := 0; i < len(patterns)-1; i++ {
|
|
for j := i + 1; j < len(patterns); j++ {
|
|
if patterns[j].X < patterns[i].X {
|
|
patterns[i], patterns[j] = patterns[j], patterns[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Return patterns in order, with hole positions appended
|
|
result := make([]string, len(patterns))
|
|
for i, p := range patterns {
|
|
result[i] = p.Pattern
|
|
}
|
|
|
|
// Filter holes: only keep those inside the digit bounding box
|
|
var holePositions []int
|
|
for _, hole := range holes {
|
|
// Check if hole center is inside digit bounds
|
|
centerX := hole.Min.X + hole.Dx()/2
|
|
centerY := hole.Min.Y + hole.Dy()/2
|
|
if centerX >= digitBounds.Min.X && centerX <= digitBounds.Max.X &&
|
|
centerY >= digitBounds.Min.Y && centerY <= digitBounds.Max.Y {
|
|
// Record X position as percentage of digit width (0-9)
|
|
relX := centerX - digitBounds.Min.X
|
|
pct := relX * 10 / digitBounds.Dx()
|
|
if pct > 9 {
|
|
pct = 9
|
|
}
|
|
holePositions = append(holePositions, pct)
|
|
}
|
|
}
|
|
|
|
// Sort and append hole positions (e.g., "h35" means holes at 30%, 50% of digit width)
|
|
// Skip if >2 holes (likely alarm bell or other icon, not a digit)
|
|
if len(holePositions) > 2 {
|
|
return nil
|
|
}
|
|
if len(holePositions) > 0 {
|
|
for i := 0; i < len(holePositions)-1; i++ {
|
|
for j := i + 1; j < len(holePositions); j++ {
|
|
if holePositions[j] < holePositions[i] {
|
|
holePositions[i], holePositions[j] = holePositions[j], holePositions[i]
|
|
}
|
|
}
|
|
}
|
|
h := "h"
|
|
for _, p := range holePositions {
|
|
h += fmt.Sprintf("%d", p)
|
|
}
|
|
result = append(result, h)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// pointsToDirections converts contour points to compass directions (normalized - no consecutive duplicates)
|
|
// Skips segments shorter than minDist pixels
|
|
func pointsToDirections(pts []image.Point) []string {
|
|
const minDist = 10 // minimum distance between points
|
|
dirs := make([]string, 0, len(pts))
|
|
var lastDir string
|
|
for i := 0; i < len(pts); i++ {
|
|
p1 := pts[i]
|
|
p2 := pts[(i+1)%len(pts)] // wrap to first point
|
|
|
|
dx := p2.X - p1.X
|
|
dy := p2.Y - p1.Y
|
|
|
|
// Skip segments that are too short
|
|
if abs(dx) < minDist && abs(dy) < minDist {
|
|
continue
|
|
}
|
|
|
|
dir := getDirection(dx, dy)
|
|
// Normalize: skip consecutive duplicates
|
|
if dir != lastDir {
|
|
dirs = append(dirs, dir)
|
|
lastDir = dir
|
|
}
|
|
}
|
|
return dirs
|
|
}
|
|
|
|
// getDirection returns direction as digit 1-4 (cardinal directions only)
|
|
// 1=N, 2=E, 3=S, 4=W
|
|
func getDirection(dx, dy int) string {
|
|
if dx == 0 && dy == 0 {
|
|
return ""
|
|
}
|
|
|
|
adx, ady := abs(dx), abs(dy)
|
|
|
|
// Pick dominant axis
|
|
if adx >= ady {
|
|
// Horizontal dominant
|
|
if dx > 0 {
|
|
return "2" // E
|
|
}
|
|
return "4" // W
|
|
}
|
|
// Vertical dominant
|
|
if dy > 0 {
|
|
return "3" // S
|
|
}
|
|
return "1" // N
|
|
}
|
|
|
|
/* OLD 8-direction version (keep for easy revert):
|
|
// getDirection returns direction as digit 1-8 (clockwise from N)
|
|
// 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW
|
|
func getDirection8(dx, dy int) string {
|
|
if dx == 0 && dy == 0 {
|
|
return ""
|
|
}
|
|
|
|
adx, ady := abs(dx), abs(dy)
|
|
|
|
// Cardinal directions (one axis dominates >2x)
|
|
if adx > ady*2 {
|
|
if dx > 0 {
|
|
return "3" // E
|
|
}
|
|
return "7" // W
|
|
}
|
|
if ady > adx*2 {
|
|
if dy > 0 {
|
|
return "5" // S
|
|
}
|
|
return "1" // N
|
|
}
|
|
|
|
// Diagonal
|
|
if dx > 0 && dy > 0 {
|
|
return "4" // SE
|
|
}
|
|
if dx > 0 && dy < 0 {
|
|
return "2" // NE
|
|
}
|
|
if dx < 0 && dy > 0 {
|
|
return "6" // SW
|
|
}
|
|
return "8" // NW
|
|
}
|
|
*/
|
|
|
|
// zhangSuenThinning performs skeletonization using the Zhang-Suen algorithm
|
|
// Input: binary image (0 = background, 255 = foreground)
|
|
// Returns: thinned image (1-pixel wide skeleton)
|
|
func zhangSuenThinning(img gocv.Mat) gocv.Mat {
|
|
rows, cols := img.Rows(), img.Cols()
|
|
|
|
// Convert to binary array for easier manipulation
|
|
data := make([][]byte, rows)
|
|
for y := 0; y < rows; y++ {
|
|
data[y] = make([]byte, cols)
|
|
for x := 0; x < cols; x++ {
|
|
if img.GetUCharAt(y, x) > 127 {
|
|
data[y][x] = 1
|
|
}
|
|
}
|
|
}
|
|
|
|
// Neighbor offsets (P2,P3,P4,P5,P6,P7,P8,P9 clockwise from top)
|
|
// P9 P2 P3
|
|
// P8 P1 P4
|
|
// P7 P6 P5
|
|
dy := []int{-1, -1, 0, 1, 1, 1, 0, -1}
|
|
dx := []int{0, 1, 1, 1, 0, -1, -1, -1}
|
|
|
|
getP := func(y, x, i int) byte {
|
|
ny, nx := y+dy[i], x+dx[i]
|
|
if ny < 0 || ny >= rows || nx < 0 || nx >= cols {
|
|
return 0
|
|
}
|
|
return data[ny][nx]
|
|
}
|
|
|
|
// Count 0->1 transitions in clockwise order
|
|
transitions := func(y, x int) int {
|
|
count := 0
|
|
for i := 0; i < 8; i++ {
|
|
if getP(y, x, i) == 0 && getP(y, x, (i+1)%8) == 1 {
|
|
count++
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
|
|
// Count non-zero neighbors
|
|
neighbors := func(y, x int) int {
|
|
count := 0
|
|
for i := 0; i < 8; i++ {
|
|
count += int(getP(y, x, i))
|
|
}
|
|
return count
|
|
}
|
|
|
|
changed := true
|
|
for changed {
|
|
changed = false
|
|
|
|
// Sub-iteration 1
|
|
var toRemove []image.Point
|
|
for y := 1; y < rows-1; y++ {
|
|
for x := 1; x < cols-1; x++ {
|
|
if data[y][x] != 1 {
|
|
continue
|
|
}
|
|
n := neighbors(y, x)
|
|
if n < 2 || n > 6 {
|
|
continue
|
|
}
|
|
if transitions(y, x) != 1 {
|
|
continue
|
|
}
|
|
// P2 * P4 * P6 == 0
|
|
if getP(y, x, 0)*getP(y, x, 2)*getP(y, x, 4) != 0 {
|
|
continue
|
|
}
|
|
// P4 * P6 * P8 == 0
|
|
if getP(y, x, 2)*getP(y, x, 4)*getP(y, x, 6) != 0 {
|
|
continue
|
|
}
|
|
toRemove = append(toRemove, image.Point{x, y})
|
|
}
|
|
}
|
|
for _, p := range toRemove {
|
|
data[p.Y][p.X] = 0
|
|
changed = true
|
|
}
|
|
|
|
// Sub-iteration 2
|
|
toRemove = nil
|
|
for y := 1; y < rows-1; y++ {
|
|
for x := 1; x < cols-1; x++ {
|
|
if data[y][x] != 1 {
|
|
continue
|
|
}
|
|
n := neighbors(y, x)
|
|
if n < 2 || n > 6 {
|
|
continue
|
|
}
|
|
if transitions(y, x) != 1 {
|
|
continue
|
|
}
|
|
// P2 * P4 * P8 == 0
|
|
if getP(y, x, 0)*getP(y, x, 2)*getP(y, x, 6) != 0 {
|
|
continue
|
|
}
|
|
// P2 * P6 * P8 == 0
|
|
if getP(y, x, 0)*getP(y, x, 4)*getP(y, x, 6) != 0 {
|
|
continue
|
|
}
|
|
toRemove = append(toRemove, image.Point{x, y})
|
|
}
|
|
}
|
|
for _, p := range toRemove {
|
|
data[p.Y][p.X] = 0
|
|
changed = true
|
|
}
|
|
}
|
|
|
|
// Convert back to Mat
|
|
result := gocv.NewMatWithSize(rows, cols, gocv.MatTypeCV8U)
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if data[y][x] == 1 {
|
|
result.SetUCharAt(y, x, 255)
|
|
}
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// traceSkeletonStrokes traces the skeleton and returns stroke directions
|
|
// Returns directions like "RDLU" (Right, Down, Left, Up) for a "9"
|
|
func traceSkeletonStrokes(skeleton gocv.Mat) string {
|
|
rows, cols := skeleton.Rows(), skeleton.Cols()
|
|
|
|
// Find all skeleton pixels
|
|
var points []image.Point
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if skeleton.GetUCharAt(y, x) > 0 {
|
|
points = append(points, image.Point{x, y})
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(points) == 0 {
|
|
return ""
|
|
}
|
|
|
|
// Build adjacency - 8-connected neighbors
|
|
isSet := func(x, y int) bool {
|
|
if x < 0 || x >= cols || y < 0 || y >= rows {
|
|
return false
|
|
}
|
|
return skeleton.GetUCharAt(y, x) > 0
|
|
}
|
|
|
|
neighbors := func(p image.Point) []image.Point {
|
|
var n []image.Point
|
|
for dy := -1; dy <= 1; dy++ {
|
|
for dx := -1; dx <= 1; dx++ {
|
|
if dx == 0 && dy == 0 {
|
|
continue
|
|
}
|
|
if isSet(p.X+dx, p.Y+dy) {
|
|
n = append(n, image.Point{p.X + dx, p.Y + dy})
|
|
}
|
|
}
|
|
}
|
|
return n
|
|
}
|
|
|
|
// Find endpoint (pixel with exactly 1 neighbor) - prefer topmost, then leftmost
|
|
var startPoint image.Point
|
|
foundEndpoint := false
|
|
for _, p := range points {
|
|
if len(neighbors(p)) == 1 {
|
|
if !foundEndpoint || p.Y < startPoint.Y || (p.Y == startPoint.Y && p.X < startPoint.X) {
|
|
startPoint = p
|
|
foundEndpoint = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// If no endpoint (closed loop), start from topmost-leftmost point
|
|
if !foundEndpoint {
|
|
startPoint = points[0]
|
|
for _, p := range points {
|
|
if p.Y < startPoint.Y || (p.Y == startPoint.Y && p.X < startPoint.X) {
|
|
startPoint = p
|
|
}
|
|
}
|
|
}
|
|
|
|
// Trace the skeleton from startPoint
|
|
visited := make(map[image.Point]bool)
|
|
var path []image.Point
|
|
|
|
var trace func(p image.Point)
|
|
trace = func(p image.Point) {
|
|
if visited[p] {
|
|
return
|
|
}
|
|
visited[p] = true
|
|
path = append(path, p)
|
|
|
|
for _, n := range neighbors(p) {
|
|
if !visited[n] {
|
|
trace(n)
|
|
}
|
|
}
|
|
}
|
|
trace(startPoint)
|
|
|
|
// Convert path to directions - only emit when moved 10+ pixels from checkpoint
|
|
if len(path) < 2 {
|
|
return ""
|
|
}
|
|
|
|
const threshold = 20
|
|
var result strings.Builder
|
|
var lastDir string
|
|
checkX, checkY := path[0].X, path[0].Y
|
|
|
|
for _, p := range path {
|
|
dx := p.X - checkX
|
|
dy := p.Y - checkY
|
|
|
|
// Determine dominant direction if we've moved enough
|
|
var dir string
|
|
if abs(dx) >= threshold || abs(dy) >= threshold {
|
|
// Pick the dominant direction
|
|
if abs(dy) > abs(dx) {
|
|
if dy > 0 {
|
|
dir = "D"
|
|
} else {
|
|
dir = "U"
|
|
}
|
|
} else {
|
|
if dx > 0 {
|
|
dir = "R"
|
|
} else {
|
|
dir = "L"
|
|
}
|
|
}
|
|
// Reset checkpoint to current position
|
|
checkX, checkY = p.X, p.Y
|
|
}
|
|
|
|
if dir != "" && dir != lastDir {
|
|
result.WriteString(dir)
|
|
lastDir = dir
|
|
}
|
|
}
|
|
|
|
return result.String()
|
|
}
|
|
|
|
// analyzeHalf analyzes a half of a digit skeleton and returns shape descriptors for left/middle/right
|
|
// Returns a 3-char string like "|-|" (vertical, horizontal, vertical) or "|^|" (vertical, arch, vertical)
|
|
func analyzeHalf(skeleton gocv.Mat) string {
|
|
rows, cols := skeleton.Rows(), skeleton.Cols()
|
|
if rows < 3 || cols < 3 {
|
|
return "..."
|
|
}
|
|
|
|
// Divide into left/middle/right thirds
|
|
third := cols / 3
|
|
|
|
// Count pixels in each vertical slice and their distribution
|
|
analyzeThird := func(startX, endX int) rune {
|
|
var pixels []image.Point
|
|
for y := 0; y < rows; y++ {
|
|
for x := startX; x < endX; x++ {
|
|
if skeleton.GetUCharAt(y, x) > 0 {
|
|
pixels = append(pixels, image.Point{x, y})
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(pixels) < 2 {
|
|
return '.' // empty
|
|
}
|
|
|
|
// Find Y span and X span
|
|
minY, maxY := rows, 0
|
|
minX, maxX := cols, 0
|
|
for _, p := range pixels {
|
|
if p.Y < minY { minY = p.Y }
|
|
if p.Y > maxY { maxY = p.Y }
|
|
if p.X < minX { minX = p.X }
|
|
if p.X > maxX { maxX = p.X }
|
|
}
|
|
|
|
ySpan := maxY - minY
|
|
xSpan := maxX - minX
|
|
|
|
// Vertical stroke: tall and narrow
|
|
if ySpan > rows/2 && xSpan < cols/4 {
|
|
return '|'
|
|
}
|
|
|
|
// Horizontal stroke: wide and short
|
|
if xSpan > (endX-startX)/2 && ySpan < rows/3 {
|
|
return '-'
|
|
}
|
|
|
|
// Has content but not clearly vertical or horizontal
|
|
return 'o' // curve/other
|
|
}
|
|
|
|
// For middle section, also detect arch vs bowl
|
|
analyzeMiddle := func() rune {
|
|
startX, endX := third, cols-third
|
|
var pixels []image.Point
|
|
for y := 0; y < rows; y++ {
|
|
for x := startX; x < endX; x++ {
|
|
if skeleton.GetUCharAt(y, x) > 0 {
|
|
pixels = append(pixels, image.Point{x, y})
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(pixels) < 2 {
|
|
return '.' // empty
|
|
}
|
|
|
|
// Check if pixels are concentrated at top (arch) or bottom (bowl)
|
|
topCount, bottomCount := 0, 0
|
|
midY := rows / 2
|
|
for _, p := range pixels {
|
|
if p.Y < midY {
|
|
topCount++
|
|
} else {
|
|
bottomCount++
|
|
}
|
|
}
|
|
|
|
// Find horizontal span
|
|
minX, maxX := cols, 0
|
|
minY, maxY := rows, 0
|
|
for _, p := range pixels {
|
|
if p.X < minX { minX = p.X }
|
|
if p.X > maxX { maxX = p.X }
|
|
if p.Y < minY { minY = p.Y }
|
|
if p.Y > maxY { maxY = p.Y }
|
|
}
|
|
xSpan := maxX - minX
|
|
ySpan := maxY - minY
|
|
|
|
// Horizontal line
|
|
if xSpan > (endX-startX)/2 && ySpan < rows/3 {
|
|
return '-'
|
|
}
|
|
|
|
// Arch (pixels at top, spans width)
|
|
if topCount > bottomCount*2 && xSpan > (endX-startX)/3 {
|
|
return '^'
|
|
}
|
|
|
|
// Bowl (pixels at bottom, spans width)
|
|
if bottomCount > topCount*2 && xSpan > (endX-startX)/3 {
|
|
return 'u'
|
|
}
|
|
|
|
return 'o' // other/curve
|
|
}
|
|
|
|
left := analyzeThird(0, third)
|
|
middle := analyzeMiddle()
|
|
right := analyzeThird(cols-third, cols)
|
|
|
|
return string([]rune{left, middle, right})
|
|
}
|
|
|
|
// classifyDigit analyzes a digit skeleton and returns a fingerprint based on top/bottom half shapes
|
|
func classifyDigit(skeleton gocv.Mat) string {
|
|
rows := skeleton.Rows()
|
|
if rows < 6 {
|
|
return ""
|
|
}
|
|
|
|
// Split into top and bottom halves
|
|
midY := rows / 2
|
|
topHalf := skeleton.Region(image.Rect(0, 0, skeleton.Cols(), midY))
|
|
bottomHalf := skeleton.Region(image.Rect(0, midY, skeleton.Cols(), rows))
|
|
|
|
topPattern := analyzeHalf(topHalf)
|
|
bottomPattern := analyzeHalf(bottomHalf)
|
|
|
|
topHalf.Close()
|
|
bottomHalf.Close()
|
|
|
|
return topPattern + "/" + bottomPattern
|
|
}
|
|
|
|
// isVerticalStroke checks if a skeleton is just a simple vertical stroke (for detecting "1")
|
|
func isVerticalStroke(skeleton gocv.Mat) bool {
|
|
rows, cols := skeleton.Rows(), skeleton.Cols()
|
|
if rows < 5 {
|
|
return false
|
|
}
|
|
|
|
// Count pixels and check distribution
|
|
var pixels []image.Point
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if skeleton.GetUCharAt(y, x) > 0 {
|
|
pixels = append(pixels, image.Point{x, y})
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(pixels) < 3 {
|
|
return false
|
|
}
|
|
|
|
// Check X variance - should be small for vertical stroke
|
|
minX, maxX := cols, 0
|
|
minY, maxY := rows, 0
|
|
for _, p := range pixels {
|
|
if p.X < minX { minX = p.X }
|
|
if p.X > maxX { maxX = p.X }
|
|
if p.Y < minY { minY = p.Y }
|
|
if p.Y > maxY { maxY = p.Y }
|
|
}
|
|
|
|
xSpan := maxX - minX
|
|
ySpan := maxY - minY
|
|
|
|
// Vertical: tall and narrow
|
|
return ySpan > rows/2 && xSpan < cols/3
|
|
}
|
|
|
|
// getDigitBounds finds bounding box of components reaching top 20% (the actual digits, not noise)
|
|
func getDigitBounds(img gocv.Mat) image.Rectangle {
|
|
rows, cols := img.Rows(), img.Cols()
|
|
topThreshold := rows * 20 / 100
|
|
|
|
// Find connected components on raw image
|
|
labels := gocv.NewMat()
|
|
numLabels := gocv.ConnectedComponents(img, &labels)
|
|
defer labels.Close()
|
|
|
|
// Check which labels reach the top
|
|
reachesTop := make([]bool, numLabels)
|
|
for y := 0; y < topThreshold; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
label := int(labels.GetIntAt(y, x))
|
|
if label > 0 {
|
|
reachesTop[label] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Find bounding box of all top-reaching components
|
|
minX, minY := cols, rows
|
|
maxX, maxY := 0, 0
|
|
found := false
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
label := int(labels.GetIntAt(y, x))
|
|
if label > 0 && reachesTop[label] {
|
|
found = true
|
|
if x < minX { minX = x }
|
|
if x > maxX { maxX = x }
|
|
if y < minY { minY = y }
|
|
if y > maxY { maxY = y }
|
|
}
|
|
}
|
|
}
|
|
|
|
if !found {
|
|
return image.Rect(0, 0, cols, rows) // fallback to full image
|
|
}
|
|
|
|
// Add padding on all sides to avoid cutting off edges
|
|
pad := 3
|
|
minX = max(0, minX-pad)
|
|
minY = max(0, minY-pad)
|
|
maxX = min(cols, maxX+pad)
|
|
maxY = min(rows, maxY+pad)
|
|
|
|
return image.Rect(minX, minY, maxX, maxY)
|
|
}
|
|
|
|
func max(a, b int) int {
|
|
if a > b { return a }
|
|
return b
|
|
}
|
|
|
|
// HoleInfo holds information about a detected hole
|
|
type HoleInfo struct {
|
|
CenterX, CenterY int // center position
|
|
Area int // pixel count
|
|
BoundsW, BoundsH int // bounding box size
|
|
}
|
|
|
|
// findHoles finds enclosed regions (holes) and returns their properties
|
|
// Uses connected components on inverted image, excluding border-touching regions
|
|
func findHoles(img gocv.Mat) []HoleInfo {
|
|
rows, cols := img.Rows(), img.Cols()
|
|
if rows < 3 || cols < 3 {
|
|
return nil
|
|
}
|
|
|
|
// Invert image (holes become white)
|
|
inverted := gocv.NewMat()
|
|
gocv.BitwiseNot(img, &inverted)
|
|
defer inverted.Close()
|
|
|
|
// Find connected components
|
|
labels := gocv.NewMat()
|
|
numLabels := gocv.ConnectedComponents(inverted, &labels)
|
|
defer labels.Close()
|
|
|
|
if numLabels <= 1 {
|
|
return nil
|
|
}
|
|
|
|
// Check which labels touch the border (those are background, not holes)
|
|
touchesBorder := make([]bool, numLabels)
|
|
for x := 0; x < cols; x++ {
|
|
touchesBorder[int(labels.GetIntAt(0, x))] = true
|
|
touchesBorder[int(labels.GetIntAt(rows-1, x))] = true
|
|
}
|
|
for y := 0; y < rows; y++ {
|
|
touchesBorder[int(labels.GetIntAt(y, 0))] = true
|
|
touchesBorder[int(labels.GetIntAt(y, cols-1))] = true
|
|
}
|
|
|
|
// Analyze each hole
|
|
var holes []HoleInfo
|
|
for label := 1; label < numLabels; label++ {
|
|
if touchesBorder[label] {
|
|
continue
|
|
}
|
|
|
|
// Find bounds and count pixels
|
|
minX, maxX := cols, 0
|
|
minY, maxY := rows, 0
|
|
sumX, sumY, count := 0, 0, 0
|
|
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if int(labels.GetIntAt(y, x)) == label {
|
|
if x < minX { minX = x }
|
|
if x > maxX { maxX = x }
|
|
if y < minY { minY = y }
|
|
if y > maxY { maxY = y }
|
|
sumX += x
|
|
sumY += y
|
|
count++
|
|
}
|
|
}
|
|
}
|
|
|
|
if count < 4 { // ignore tiny specks
|
|
continue
|
|
}
|
|
|
|
holes = append(holes, HoleInfo{
|
|
CenterX: sumX / count,
|
|
CenterY: sumY / count,
|
|
Area: count,
|
|
BoundsW: maxX - minX + 1,
|
|
BoundsH: maxY - minY + 1,
|
|
})
|
|
}
|
|
|
|
return holes
|
|
}
|
|
|
|
// WhiteRun represents a contiguous white region at a scan line
|
|
type WhiteRun struct {
|
|
Start, End int
|
|
}
|
|
|
|
func (r WhiteRun) Width() int { return r.End - r.Start }
|
|
func (r WhiteRun) Mid() int { return (r.Start + r.End) / 2 }
|
|
|
|
// findWhiteRuns scans a row and returns all white runs
|
|
// Merges runs separated by tiny gaps (< 3px) which are likely rendering artifacts
|
|
func findWhiteRuns(img gocv.Mat, y int) []WhiteRun {
|
|
cols := img.Cols()
|
|
var runs []WhiteRun
|
|
|
|
inWhite := false
|
|
runStart := 0
|
|
|
|
for x := 0; x <= cols; x++ {
|
|
isWhite := x < cols && img.GetUCharAt(y, x) > 128
|
|
if isWhite && !inWhite {
|
|
runStart = x
|
|
inWhite = true
|
|
} else if !isWhite && inWhite {
|
|
runs = append(runs, WhiteRun{runStart, x})
|
|
inWhite = false
|
|
}
|
|
}
|
|
|
|
// Merge runs separated by tiny gaps (< 3 pixels)
|
|
if len(runs) > 1 {
|
|
merged := []WhiteRun{runs[0]}
|
|
for i := 1; i < len(runs); i++ {
|
|
gap := runs[i].Start - merged[len(merged)-1].End
|
|
if gap < 3 {
|
|
// Merge with previous run
|
|
merged[len(merged)-1].End = runs[i].End
|
|
} else {
|
|
merged = append(merged, runs[i])
|
|
}
|
|
}
|
|
runs = merged
|
|
}
|
|
|
|
return runs
|
|
}
|
|
|
|
// recognizeNumber uses scan-line analysis to find and classify digits
|
|
// Strategy: Find "1"s first (narrow at both scan lines), then divide rest equally
|
|
func recognizeNumber(img gocv.Mat) string {
|
|
rows, cols := img.Rows(), img.Cols()
|
|
if rows < 10 || cols < 5 {
|
|
return ""
|
|
}
|
|
|
|
y2 := rows * 2 / 5 // upper scan
|
|
y4 := rows * 4 / 5 // lower scan
|
|
|
|
runs2 := findWhiteRuns(img, y2)
|
|
runs4 := findWhiteRuns(img, y4)
|
|
|
|
// Debug
|
|
fmt.Printf(" y=%d: ", y2)
|
|
for _, r := range runs2 {
|
|
fmt.Printf("[%d-%d](%d) ", r.Start, r.End, r.Width())
|
|
}
|
|
fmt.Printf("\n y=%d: ", y4)
|
|
for _, r := range runs4 {
|
|
fmt.Printf("[%d-%d](%d) ", r.Start, r.End, r.Width())
|
|
}
|
|
fmt.Println()
|
|
|
|
// Find "1"s: narrow runs at both scan lines at similar X position
|
|
// A "1" is narrow (<20px) at both levels
|
|
type OneDigit struct {
|
|
X, W int // position and width
|
|
}
|
|
var ones []OneDigit
|
|
|
|
for _, r2 := range runs2 {
|
|
if r2.Width() > 20 {
|
|
continue // not narrow enough for "1"
|
|
}
|
|
// Find matching narrow run at y4
|
|
for _, r4 := range runs4 {
|
|
if r4.Width() > 20 {
|
|
continue
|
|
}
|
|
// Check if they overlap in X (same digit)
|
|
overlap := min(r2.End, r4.End) - max(r2.Start, r4.Start)
|
|
if overlap > 5 {
|
|
// Found a "1"
|
|
x := min(r2.Start, r4.Start)
|
|
w := max(r2.End, r4.End) - x
|
|
ones = append(ones, OneDigit{x, w})
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// Find content bounds (first and last white pixel columns)
|
|
contentStart, contentEnd := cols, 0
|
|
for _, r := range runs2 {
|
|
if r.Start < contentStart {
|
|
contentStart = r.Start
|
|
}
|
|
if r.End > contentEnd {
|
|
contentEnd = r.End
|
|
}
|
|
}
|
|
for _, r := range runs4 {
|
|
if r.Start < contentStart {
|
|
contentStart = r.Start
|
|
}
|
|
if r.End > contentEnd {
|
|
contentEnd = r.End
|
|
}
|
|
}
|
|
|
|
// Build digit list
|
|
type Digit struct {
|
|
Start, End int
|
|
IsOne bool
|
|
Runs2, Runs4 []WhiteRun
|
|
}
|
|
var digits []Digit
|
|
|
|
// Sort ones by X
|
|
for i := 0; i < len(ones); i++ {
|
|
for j := i + 1; j < len(ones); j++ {
|
|
if ones[j].X < ones[i].X {
|
|
ones[i], ones[j] = ones[j], ones[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Find digit boundaries using GAPS at BOTH scan lines
|
|
// A gap >15px at EITHER line suggests a digit boundary
|
|
gapThreshold := 15
|
|
|
|
// Special case: if y=58 (4/5) has one merged run >80px but y=29 (2/5) has multiple runs,
|
|
// the digits are touching at the bottom. Use a smarter approach for y=29 gaps.
|
|
y4Merged := len(runs4) == 1 && runs4[0].Width() > 80
|
|
|
|
// Collect split points from both scan lines
|
|
splitSet := make(map[int]bool)
|
|
|
|
for i := 1; i < len(runs2); i++ {
|
|
gap := runs2[i].Start - runs2[i-1].End
|
|
threshold := gapThreshold
|
|
|
|
if y4Merged && len(runs2) >= 3 {
|
|
// When digits merge at bottom, use width analysis:
|
|
// A narrow run following a gap is likely "other side of hole", not a new digit
|
|
// Don't split before narrow runs (they're continuations of previous digit)
|
|
prevWidth := runs2[i-1].Width()
|
|
nextWidth := runs2[i].Width()
|
|
|
|
if nextWidth < prevWidth*2/3 {
|
|
// Next run is significantly narrower - likely part of same digit (hole)
|
|
continue // Skip this gap
|
|
}
|
|
threshold = 8 // Lower threshold for digit boundaries
|
|
}
|
|
|
|
if gap > threshold {
|
|
splitSet[(runs2[i-1].End+runs2[i].Start)/2] = true
|
|
}
|
|
}
|
|
for i := 1; i < len(runs4); i++ {
|
|
gap := runs4[i].Start - runs4[i-1].End
|
|
// Use lower threshold for y=58 gaps - they often represent real digit boundaries
|
|
// even when the gap is small (like between "9" and "2")
|
|
if gap > 2 {
|
|
splitSet[(runs4[i-1].End+runs4[i].Start)/2] = true
|
|
}
|
|
}
|
|
|
|
// Convert to sorted list
|
|
var splits []int
|
|
for sp := range splitSet {
|
|
splits = append(splits, sp)
|
|
}
|
|
for i := 0; i < len(splits); i++ {
|
|
for j := i + 1; j < len(splits); j++ {
|
|
if splits[j] < splits[i] {
|
|
splits[i], splits[j] = splits[j], splits[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove splits that are too close together (<20px)
|
|
var filteredSplits []int
|
|
for _, sp := range splits {
|
|
if len(filteredSplits) == 0 || sp-filteredSplits[len(filteredSplits)-1] > 20 {
|
|
filteredSplits = append(filteredSplits, sp)
|
|
}
|
|
}
|
|
splits = filteredSplits
|
|
|
|
// Create digit boundaries
|
|
totalWidth := contentEnd - contentStart
|
|
pos := contentStart
|
|
for _, sp := range splits {
|
|
if sp > pos && sp < contentEnd {
|
|
digits = append(digits, Digit{Start: pos, End: sp})
|
|
pos = sp
|
|
}
|
|
}
|
|
digits = append(digits, Digit{Start: pos, End: contentEnd})
|
|
|
|
// SIMPLIFIED APPROACH: Numbers are either 2 digits OR 3 digits starting with "1"
|
|
// Detect leading "1" using multiple strategies:
|
|
// 1. Narrow region at BOTH scan lines followed by gap at both (ideal case)
|
|
// 2. Narrow first run at y=58 followed by gap (when "1" merges with next digit at y=29)
|
|
|
|
hasLeadingOne := false
|
|
oneEndPos := contentStart
|
|
|
|
// Strategy 1: Scan columns looking for narrow both-white region followed by both-dark gap
|
|
scanLimit := contentStart + totalWidth*3/10
|
|
inWhite := false
|
|
whiteStart := 0
|
|
|
|
for x := contentStart; x < scanLimit; x++ {
|
|
isWhite := img.GetUCharAt(y2, x) > 128 && img.GetUCharAt(y4, x) > 128
|
|
|
|
if isWhite && !inWhite {
|
|
whiteStart = x
|
|
inWhite = true
|
|
} else if !isWhite && inWhite {
|
|
whiteWidth := x - whiteStart
|
|
if whiteWidth > 0 && whiteWidth < totalWidth/4 {
|
|
minGapWidth := totalWidth / 20
|
|
trueGapWidth := 0
|
|
for x2 := x; x2 < contentEnd; x2++ {
|
|
bothDark := img.GetUCharAt(y2, x2) <= 128 && img.GetUCharAt(y4, x2) <= 128
|
|
if bothDark {
|
|
trueGapWidth++
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
if trueGapWidth >= minGapWidth {
|
|
hasLeadingOne = true
|
|
oneEndPos = x
|
|
}
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
// Strategy 2: If not found, check if first run at y=58 is narrow with gap after
|
|
// This catches "1" that merges with next digit at y=29 but separates at y=58
|
|
// IMPORTANT: Also verify y=29 has a narrow first run - this distinguishes "1" from "9"
|
|
// "9" has narrow stem at y=58 but WIDE top with hole at y=29
|
|
// "1" is narrow at BOTH scan lines
|
|
if !hasLeadingOne && len(runs4) >= 2 && len(runs2) >= 1 {
|
|
firstRun4 := runs4[0]
|
|
firstRunWidth4 := firstRun4.Width()
|
|
gapAfterFirst := runs4[1].Start - firstRun4.End
|
|
|
|
// Check y=29 first run is also narrow (< 25% of total)
|
|
// "1" should be narrow at both lines; "9" has wide top
|
|
firstRun2 := runs2[0]
|
|
firstRunWidth2 := firstRun2.Width()
|
|
|
|
// "1" at y=58 is narrow (< 20% of total) and positioned in first third
|
|
// Relaxed position check to 33% to handle merged displays
|
|
if firstRunWidth4 < totalWidth/5 && firstRun4.End < contentStart+totalWidth/3 {
|
|
// Additional check: y=29 first run must also be narrow (< 30% of total)
|
|
// This prevents "9" (which has wide top) from being detected as "1"
|
|
// Relaxed from 25% to 30% because "1" can merge slightly with next digit
|
|
if firstRunWidth2 < totalWidth*3/10 {
|
|
if gapAfterFirst > totalWidth/30 || len(runs4) >= 3 {
|
|
hasLeadingOne = true
|
|
// Use second run's start as boundary (where digit 2 begins)
|
|
oneEndPos = runs4[1].Start
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Strategy 3: Detect 3-digit numbers when "1" and "0" merge at y=58
|
|
// This happens when the "1" stem connects with the "0" at the bottom
|
|
// Indicators: wide total content (>100px), 3+ runs at y=29, first y=58 run is very wide
|
|
if !hasLeadingOne && totalWidth > 100 && len(runs2) >= 3 && len(runs4) >= 2 {
|
|
// For "104": y=58 shows [1+0 merged ~50%] [4 ~35%]
|
|
// The first run being very wide (> 40% of total) suggests merged digits
|
|
firstRun4 := runs4[0]
|
|
firstRunWidth := firstRun4.Width()
|
|
|
|
// If first run is very wide (> 40% of content), it's likely "1"+"0" merged
|
|
// This distinguishes from 2-digit numbers where each digit is ~50%
|
|
if firstRunWidth > totalWidth*40/100 && firstRunWidth < totalWidth*65/100 {
|
|
hasLeadingOne = true
|
|
// For merged "1"+"0", split at 1/3 of total width for the "1"
|
|
oneEndPos = contentStart + totalWidth/3
|
|
}
|
|
}
|
|
|
|
// Build digit list based on detection
|
|
digits = nil
|
|
if hasLeadingOne {
|
|
// 3 digits: "1" + 2 more digits
|
|
// For 3 runs at y=58: run0="1", run1="0", run2="2" (each run = one digit)
|
|
// Use run starts as digit boundaries
|
|
digits = append(digits, Digit{Start: contentStart, End: oneEndPos, IsOne: true})
|
|
|
|
secondEnd := (oneEndPos + contentEnd) / 2 // fallback
|
|
if len(runs4) >= 3 {
|
|
// Digit 2 ends where digit 3's run begins
|
|
secondEnd = runs4[2].Start
|
|
} else if len(runs4) == 2 {
|
|
// Strategy 3 case: 2 runs at y=58 where first is merged "1"+"0"
|
|
// Use last run's start as boundary (that's where digit 3 begins)
|
|
lastRunStart := runs4[1].Start
|
|
// Only use if it's in the right 40% of the content (digit 3 area)
|
|
if lastRunStart > contentStart+totalWidth*55/100 {
|
|
secondEnd = lastRunStart
|
|
}
|
|
}
|
|
|
|
digits = append(digits, Digit{Start: oneEndPos, End: secondEnd})
|
|
digits = append(digits, Digit{Start: secondEnd, End: contentEnd})
|
|
} else {
|
|
// 2 digits - split at the LARGEST gap at y=51 that's in the middle region (30-70%)
|
|
// This is the digit boundary; gaps outside this range are likely artifacts
|
|
if len(runs4) >= 2 {
|
|
// Find the largest gap between consecutive runs that's in the middle region
|
|
maxGap := 0
|
|
mid := (contentStart + contentEnd) / 2
|
|
splitPos := mid // fallback to center
|
|
minSplit := contentStart + totalWidth*30/100
|
|
maxSplit := contentStart + totalWidth*70/100
|
|
|
|
for i := 1; i < len(runs4); i++ {
|
|
gapMid := (runs4[i-1].End + runs4[i].Start) / 2
|
|
// Only consider gaps in the middle region
|
|
if gapMid >= minSplit && gapMid <= maxSplit {
|
|
gap := runs4[i].Start - runs4[i-1].End
|
|
if gap > maxGap {
|
|
maxGap = gap
|
|
splitPos = gapMid
|
|
}
|
|
}
|
|
}
|
|
digits = append(digits, Digit{Start: contentStart, End: splitPos})
|
|
digits = append(digits, Digit{Start: splitPos, End: contentEnd})
|
|
} else {
|
|
// Equal split
|
|
mid := (contentStart + contentEnd) / 2
|
|
digits = append(digits, Digit{Start: contentStart, End: mid})
|
|
digits = append(digits, Digit{Start: mid, End: contentEnd})
|
|
}
|
|
}
|
|
|
|
// NOTE: Previously we checked for detected "1"s and marked digits as IsOne.
|
|
// This caused false positives when narrow runs from other digits (like "4", "5")
|
|
// were mistakenly identified as "1"s. Now we rely solely on the leading "1"
|
|
// detection strategies (1/2/3) which only mark the FIRST digit as IsOne when
|
|
// it's a 3-digit number starting with "1". The scan-line classification
|
|
// handles all other "1" detection.
|
|
|
|
// Assign runs to digits with CLIPPING to digit boundaries
|
|
// This handles runs that span multiple digits (e.g., merged "1" + "0")
|
|
// Convert run coordinates to be RELATIVE to digit start
|
|
// Use different thresholds: y=29 needs 8px (avoid boundary artifacts),
|
|
// y=58 needs only 4px (to catch narrow hole edges like right side of "0")
|
|
minRunWidth2 := 8 // for y=29
|
|
minRunWidth4 := 4 // for y=58
|
|
|
|
for i := range digits {
|
|
d := &digits[i]
|
|
|
|
for _, r := range runs2 {
|
|
overlapStart := max(r.Start, d.Start)
|
|
overlapEnd := min(r.End, d.End)
|
|
if overlapEnd-overlapStart >= minRunWidth2 {
|
|
relRun := WhiteRun{Start: overlapStart - d.Start, End: overlapEnd - d.Start}
|
|
d.Runs2 = append(d.Runs2, relRun)
|
|
}
|
|
}
|
|
|
|
for _, r := range runs4 {
|
|
overlapStart := max(r.Start, d.Start)
|
|
overlapEnd := min(r.End, d.End)
|
|
if overlapEnd-overlapStart >= minRunWidth4 {
|
|
relRun := WhiteRun{Start: overlapStart - d.Start, End: overlapEnd - d.Start}
|
|
d.Runs4 = append(d.Runs4, relRun)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Debug image
|
|
vis := gocv.NewMat()
|
|
gocv.CvtColor(img, &vis, gocv.ColorGrayToBGR)
|
|
gocv.Line(&vis, image.Point{0, y2}, image.Point{cols, y2}, color.RGBA{255, 255, 0, 255}, 1)
|
|
gocv.Line(&vis, image.Point{0, y4}, image.Point{cols, y4}, color.RGBA{255, 0, 255, 255}, 1)
|
|
for _, d := range digits {
|
|
c := color.RGBA{0, 255, 0, 255}
|
|
if d.IsOne {
|
|
c = color.RGBA{255, 0, 0, 255} // red for "1"s
|
|
}
|
|
gocv.Line(&vis, image.Point{d.Start, 0}, image.Point{d.Start, rows}, c, 1)
|
|
gocv.Line(&vis, image.Point{d.End, 0}, image.Point{d.End, rows}, c, 1)
|
|
}
|
|
if cols < 100 {
|
|
gocv.IMWrite("debug_cuts_left.png", vis)
|
|
} else {
|
|
gocv.IMWrite("debug_cuts_right.png", vis)
|
|
}
|
|
vis.Close()
|
|
|
|
// Determine side for labeling
|
|
side := "left"
|
|
if cols > 100 {
|
|
side = "right"
|
|
}
|
|
|
|
// Classify each digit
|
|
var result string
|
|
for i, d := range digits {
|
|
var digit string
|
|
if d.IsOne {
|
|
digit = "1"
|
|
} else {
|
|
// Calculate fingerprint
|
|
white2 := 0
|
|
for _, r := range d.Runs2 {
|
|
white2 += r.Width()
|
|
}
|
|
white4 := 0
|
|
for _, r := range d.Runs4 {
|
|
white4 += r.Width()
|
|
}
|
|
width := d.End - d.Start
|
|
p2 := ((white2 * 100 / width) + 2) / 5 * 5
|
|
p4 := ((white4 * 100 / width) + 2) / 5 * 5
|
|
fingerprint := fmt.Sprintf("%d_%d", p2, p4)
|
|
|
|
// Check pattern store first
|
|
if patternStore != nil {
|
|
if val, ok := patternStore.Lookup(fingerprint); ok {
|
|
if val >= 0 && val <= 9 {
|
|
digit = fmt.Sprintf("%d", val)
|
|
fmt.Printf(" [%d-%d] fp=%s -> %s (learned)\n", d.Start, d.End, fingerprint, digit)
|
|
} else {
|
|
// val == -1 means ignore/unknown
|
|
digit = "?"
|
|
fmt.Printf(" [%d-%d] fp=%s -> ? (marked ignore)\n", d.Start, d.End, fingerprint)
|
|
}
|
|
}
|
|
}
|
|
|
|
// If not found in store, save for labeling and use best guess
|
|
if digit == "" {
|
|
// Save digit image for labeling
|
|
digitImg := img.Region(image.Rect(d.Start, 0, d.End, rows))
|
|
imgPath := fmt.Sprintf("digit_%s_%d_%s.png", side, i, fingerprint)
|
|
gocv.IMWrite(imgPath, digitImg)
|
|
|
|
// Add to unlabeled queue
|
|
AddUnlabeled(fingerprint, imgPath, side)
|
|
|
|
// Use fingerprint matcher as fallback
|
|
digit = classifyByFingerprint(p2, p4)
|
|
fmt.Printf(" [%d-%d] fp=%s -> %s (guessed, queued for labeling)\n", d.Start, d.End, fingerprint, digit)
|
|
}
|
|
}
|
|
|
|
fmt.Printf(" [%d-%d] runs2=%d runs4=%d -> %s\n",
|
|
d.Start, d.End, len(d.Runs2), len(d.Runs4), digit)
|
|
if digit == "" || digit == "?" {
|
|
return ""
|
|
}
|
|
result += digit
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// classifyByFingerprint uses p2/p4 percentages to guess the digit
|
|
// This is a fallback when the pattern isn't in the learned store
|
|
func classifyByFingerprint(p2, p4 int) string {
|
|
// Fingerprint table: [p2, p4] -> digit
|
|
// Based on 7-segment display patterns:
|
|
// _ At 2/5: middle area (left+right segments or holes)
|
|
// |_| At 4/5: lower area (bottom segments)
|
|
// |_|
|
|
//
|
|
// Approximate expected percentages (will need calibration):
|
|
// "0": ~40-50% at both (left+right segments, no middle bar visible)
|
|
// "1": ~15-25% at both (just right stem)
|
|
// "2": ~50-60% at 2/5 (top curve), ~70-80% at 4/5 (bottom bar)
|
|
// "3": ~50-60% at both (curves on right)
|
|
// "4": ~50-60% at 2/5 (left+right+middle), ~20-30% at 4/5 (just right stem)
|
|
// "5": ~50-60% at 2/5 (top+left), ~40-50% at 4/5 (bottom curve)
|
|
// "6": ~25-35% at 2/5 (left stem), ~60-70% at 4/5 (bottom loop)
|
|
// "7": ~50-60% at 2/5 (top bar), ~20-30% at 4/5 (diagonal stem)
|
|
// "8": ~40-50% at both (holes visible at both)
|
|
// "9": ~40-50% at 2/5 (top loop), ~40-50% at 4/5 (stem + bottom)
|
|
|
|
// Match by finding closest pattern
|
|
// Format: {p2, p4, digit}
|
|
patterns := []struct {
|
|
p2, p4 int
|
|
digit string
|
|
}{
|
|
{45, 70, "0"}, // wide at both, slightly wider at bottom
|
|
{20, 20, "1"}, // narrow at both
|
|
{55, 75, "2"}, // medium top, wide bottom bar
|
|
{55, 50, "3"}, // medium at both, right-heavy
|
|
{55, 25, "4"}, // wide top (hole), narrow bottom (stem)
|
|
{55, 45, "5"}, // medium top, medium-right bottom
|
|
{30, 65, "6"}, // narrow top (stem), wide bottom (loop)
|
|
{55, 25, "7"}, // wide top (bar), narrow bottom (stem) - same as 4
|
|
{45, 45, "8"}, // medium at both (holes)
|
|
{45, 50, "9"}, // medium top (loop), medium bottom (stem+bottom)
|
|
}
|
|
|
|
// Find best match using Manhattan distance
|
|
bestDist := 1000
|
|
bestDigit := "?"
|
|
for _, pat := range patterns {
|
|
dist := abs(p2-pat.p2) + abs(p4-pat.p4)
|
|
if dist < bestDist {
|
|
bestDist = dist
|
|
bestDigit = pat.digit
|
|
}
|
|
}
|
|
|
|
return bestDigit
|
|
}
|
|
|
|
// classifyDigitByFeatures classifies a single digit using hole and stroke analysis
|
|
// Returns the digit as a string ("0"-"9") or "" if uncertain
|
|
func classifyDigitByFeatures(img gocv.Mat) string {
|
|
rows, cols := img.Rows(), img.Cols()
|
|
if rows < 5 || cols < 3 {
|
|
return ""
|
|
}
|
|
|
|
holes := findHoles(img)
|
|
imgArea := rows * cols
|
|
|
|
// Debug output
|
|
fmt.Printf(" [Digit %dx%d] holes=%d", cols, rows, len(holes))
|
|
for i, h := range holes {
|
|
fmt.Printf(" h%d(y=%d,area=%d)", i, h.CenterY, h.Area)
|
|
}
|
|
|
|
// === HOLE-BASED CLASSIFICATION ===
|
|
if len(holes) == 2 {
|
|
fmt.Println(" -> 8 (2 holes)")
|
|
return "8"
|
|
}
|
|
|
|
if len(holes) == 1 {
|
|
hole := holes[0]
|
|
midY := rows / 2
|
|
|
|
// Hole in top half → 9
|
|
if hole.CenterY < midY {
|
|
fmt.Printf(" -> 9 (hole in top, centerY=%d < midY=%d)\n", hole.CenterY, midY)
|
|
return "9"
|
|
}
|
|
// Hole in bottom half → 6
|
|
if hole.CenterY > midY {
|
|
fmt.Printf(" -> 6 (hole in bottom, centerY=%d > midY=%d)\n", hole.CenterY, midY)
|
|
return "6"
|
|
}
|
|
// Hole exactly at middle - check size
|
|
// Large hole (>10% of image area) → 0
|
|
// Small hole → 4
|
|
if hole.Area > imgArea*10/100 {
|
|
fmt.Printf(" -> 0 (large hole at middle, area=%d > %d)\n", hole.Area, imgArea*10/100)
|
|
return "0"
|
|
}
|
|
fmt.Printf(" -> 4 (small hole at middle, area=%d <= %d)\n", hole.Area, imgArea*10/100)
|
|
return "4"
|
|
}
|
|
fmt.Print(" no-hole:")
|
|
|
|
// === NO-HOLE CLASSIFICATION ===
|
|
// First find the bounding box of white pixels
|
|
minX, maxX := cols, 0
|
|
minY, maxY := rows, 0
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if img.GetUCharAt(y, x) > 0 {
|
|
if x < minX { minX = x }
|
|
if x > maxX { maxX = x }
|
|
if y < minY { minY = y }
|
|
if y > maxY { maxY = y }
|
|
}
|
|
}
|
|
}
|
|
digitWidth := maxX - minX + 1
|
|
digitHeight := maxY - minY + 1
|
|
|
|
// Check for narrow vertical (1) FIRST - before other checks
|
|
// "1" is tall and narrow (height/width ratio > 2)
|
|
if digitWidth > 0 && digitHeight > digitWidth*2 {
|
|
fmt.Printf(" narrow ratio h/w=%d/%d=%.1f -> 1\n", digitHeight, digitWidth, float64(digitHeight)/float64(digitWidth))
|
|
return "1"
|
|
}
|
|
|
|
// Check for bar across top (5 or 7)
|
|
topRegion := rows / 5
|
|
topMinX, topMaxX := cols, 0
|
|
for y := 0; y < topRegion; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if img.GetUCharAt(y, x) > 0 {
|
|
if x < topMinX { topMinX = x }
|
|
if x > topMaxX { topMaxX = x }
|
|
}
|
|
}
|
|
}
|
|
topSpan := topMaxX - topMinX
|
|
hasTopBar := topSpan > cols*50/100 // spans more than 50% of width
|
|
|
|
if hasTopBar {
|
|
// Distinguish 5 vs 7: check bottom-left quadrant
|
|
bottomLeftPixels := 0
|
|
for y := rows * 2 / 3; y < rows; y++ {
|
|
for x := 0; x < cols/2; x++ {
|
|
if img.GetUCharAt(y, x) > 0 {
|
|
bottomLeftPixels++
|
|
}
|
|
}
|
|
}
|
|
// 5 has content in bottom-left, 7 doesn't
|
|
if bottomLeftPixels > rows*cols/50 {
|
|
fmt.Printf(" topBar+bottomLeft=%d -> 5\n", bottomLeftPixels)
|
|
return "5"
|
|
}
|
|
fmt.Printf(" topBar, no bottomLeft=%d -> 7\n", bottomLeftPixels)
|
|
return "7"
|
|
}
|
|
|
|
// Check for horizontal bar in center (3)
|
|
// 3 has content spanning the middle horizontally
|
|
midY := rows / 2
|
|
centerRegion := rows / 6
|
|
centerMinX, centerMaxX := cols, 0
|
|
for y := midY - centerRegion; y < midY+centerRegion; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if img.GetUCharAt(y, x) > 0 {
|
|
if x < centerMinX { centerMinX = x }
|
|
if x > centerMaxX { centerMaxX = x }
|
|
}
|
|
}
|
|
}
|
|
centerSpan := centerMaxX - centerMinX
|
|
// Also check that 3 has right-side content in center
|
|
rightCenterPixels := 0
|
|
for y := midY - centerRegion; y < midY+centerRegion; y++ {
|
|
for x := cols * 2 / 3; x < cols; x++ {
|
|
if img.GetUCharAt(y, x) > 0 {
|
|
rightCenterPixels++
|
|
}
|
|
}
|
|
}
|
|
if centerSpan > cols*40/100 && rightCenterPixels > rows*cols/100 {
|
|
fmt.Printf(" centerSpan=%d, rightCenter=%d -> 3\n", centerSpan, rightCenterPixels)
|
|
return "3"
|
|
}
|
|
|
|
// Leftover → 2
|
|
fmt.Printf(" width=%d, centerSpan=%d -> 2 (leftover)\n", digitWidth, centerSpan)
|
|
return "2"
|
|
}
|
|
|
|
// findHolePositions finds enclosed regions (holes) and returns their X positions as percentages (0-9)
|
|
// Uses connected components on inverted image, excluding border-touching regions
|
|
func findHolePositions(img gocv.Mat) []int {
|
|
rows, cols := img.Rows(), img.Cols()
|
|
if rows < 3 || cols < 3 {
|
|
return nil
|
|
}
|
|
|
|
// Invert image (holes become white)
|
|
inverted := gocv.NewMat()
|
|
gocv.BitwiseNot(img, &inverted)
|
|
defer inverted.Close()
|
|
|
|
// Find connected components
|
|
labels := gocv.NewMat()
|
|
numLabels := gocv.ConnectedComponents(inverted, &labels)
|
|
defer labels.Close()
|
|
|
|
if numLabels <= 1 {
|
|
return nil
|
|
}
|
|
|
|
// Check which labels touch the border (those are background, not holes)
|
|
touchesBorder := make([]bool, numLabels)
|
|
for x := 0; x < cols; x++ {
|
|
touchesBorder[int(labels.GetIntAt(0, x))] = true
|
|
touchesBorder[int(labels.GetIntAt(rows-1, x))] = true
|
|
}
|
|
for y := 0; y < rows; y++ {
|
|
touchesBorder[int(labels.GetIntAt(y, 0))] = true
|
|
touchesBorder[int(labels.GetIntAt(y, cols-1))] = true
|
|
}
|
|
|
|
// Find center X of each hole and convert to percentage (0-9)
|
|
var positions []int
|
|
for label := 1; label < numLabels; label++ {
|
|
if touchesBorder[label] {
|
|
continue
|
|
}
|
|
// Find bounding box of this hole
|
|
minX, maxX := cols, 0
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < cols; x++ {
|
|
if int(labels.GetIntAt(y, x)) == label {
|
|
if x < minX { minX = x }
|
|
if x > maxX { maxX = x }
|
|
}
|
|
}
|
|
}
|
|
// Center X as percentage (0-9)
|
|
centerX := (minX + maxX) / 2
|
|
pct := centerX * 10 / cols
|
|
if pct > 9 {
|
|
pct = 9
|
|
}
|
|
positions = append(positions, pct)
|
|
}
|
|
|
|
// Sort positions
|
|
for i := 0; i < len(positions)-1; i++ {
|
|
for j := i + 1; j < len(positions); j++ {
|
|
if positions[j] < positions[i] {
|
|
positions[i], positions[j] = positions[j], positions[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
return positions
|
|
}
|
|
|
|
// extractSkeletonPatterns extracts patterns using spatial division approach:
|
|
// 1. Crop to digit bounds (exclude noise not reaching top)
|
|
// 2. Check for leading "1" (narrow stroke on left)
|
|
// 3. Split remaining area in half for the two main digits
|
|
// 4. Analyze each with top/bottom half shape method
|
|
// 5. Count holes and append to fingerprint
|
|
func extractSkeletonPatterns(img gocv.Mat) []string {
|
|
// First find digit bounds (components reaching top), then crop
|
|
bounds := getDigitBounds(img)
|
|
croppedRegion := img.Region(bounds)
|
|
cropped := gocv.NewMat()
|
|
croppedRegion.CopyTo(&cropped)
|
|
croppedRegion.Close()
|
|
defer cropped.Close()
|
|
|
|
// Find hole positions before skeletonization (on the filled shapes)
|
|
holePositions := findHolePositions(cropped)
|
|
|
|
// Now skeletonize the cropped digit area
|
|
skeleton := zhangSuenThinning(cropped)
|
|
defer skeleton.Close()
|
|
|
|
rows, cols := skeleton.Rows(), skeleton.Cols()
|
|
var patterns []string
|
|
|
|
// Check for leading "1": look at leftmost 12% of image
|
|
// A "1" must: have vertical content AND have a gap after it
|
|
leadingWidth := cols * 12 / 100
|
|
gapStart := leadingWidth
|
|
gapEnd := cols * 20 / 100
|
|
hasLeadingOne := false
|
|
|
|
// Count pixels in leading region
|
|
leadingPixels := 0
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < leadingWidth; x++ {
|
|
if skeleton.GetUCharAt(y, x) > 0 {
|
|
leadingPixels++
|
|
}
|
|
}
|
|
}
|
|
|
|
// Count pixels in gap region (should be empty for a true "1")
|
|
gapPixels := 0
|
|
for y := 0; y < rows; y++ {
|
|
for x := gapStart; x < gapEnd; x++ {
|
|
if skeleton.GetUCharAt(y, x) > 0 {
|
|
gapPixels++
|
|
}
|
|
}
|
|
}
|
|
|
|
// If there are pixels in leading region AND gap is mostly empty
|
|
if leadingPixels > rows/2 && gapPixels < rows/4 {
|
|
// Check if pixels are vertically aligned (narrow X spread)
|
|
minX, maxX := cols, 0
|
|
for y := 0; y < rows; y++ {
|
|
for x := 0; x < leadingWidth; x++ {
|
|
if skeleton.GetUCharAt(y, x) > 0 {
|
|
if x < minX { minX = x }
|
|
if x > maxX { maxX = x }
|
|
}
|
|
}
|
|
}
|
|
if maxX-minX < leadingWidth {
|
|
hasLeadingOne = true
|
|
patterns = append(patterns, "1")
|
|
}
|
|
}
|
|
|
|
// Determine where the two main digits start
|
|
startX := 0
|
|
if hasLeadingOne {
|
|
startX = leadingWidth
|
|
}
|
|
|
|
// Split remaining area in half for two digits
|
|
remainingWidth := cols - startX
|
|
midX := startX + remainingWidth/2
|
|
|
|
// Extract left digit region
|
|
leftDigit := skeleton.Region(image.Rect(startX, 0, midX, rows))
|
|
leftPattern := classifyDigit(leftDigit)
|
|
leftDigit.Close()
|
|
if leftPattern != "" {
|
|
patterns = append(patterns, leftPattern)
|
|
}
|
|
|
|
// Extract right digit region
|
|
rightDigit := skeleton.Region(image.Rect(midX, 0, cols, rows))
|
|
rightPattern := classifyDigit(rightDigit)
|
|
rightDigit.Close()
|
|
if rightPattern != "" {
|
|
patterns = append(patterns, rightPattern)
|
|
}
|
|
|
|
// Append hole positions (e.g., "h27" means holes at 20% and 70% of width)
|
|
if len(holePositions) > 0 && len(holePositions) <= 4 {
|
|
h := "h"
|
|
for _, p := range holePositions {
|
|
h += fmt.Sprintf("%d", p)
|
|
}
|
|
patterns = append(patterns, h)
|
|
}
|
|
|
|
return patterns
|
|
}
|
|
|