commit 9bd05496f2e4584433167c8d92f633df97ca44e6 Author: Johan Date: Sun Feb 1 02:00:23 2026 -0500 Initial commit diff --git a/.DS_Store b/.DS_Store new file mode 100755 index 0000000..b50dfa5 Binary files /dev/null and b/.DS_Store differ diff --git a/._.DS_Store b/._.DS_Store new file mode 100755 index 0000000..28c42fb Binary files /dev/null and b/._.DS_Store differ diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..590a1b9 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(./build.sh)" + ], + "deny": [], + "ask": [] + } +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0c64ecb --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Binaries +*.exe +*.exe~ +*.dll +*.so +*.dylib +/bin/ +/dist/ + +# Test binary +*.test + +# Output +*.out + +# Dependency directories +vendor/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Env files +.env +.env.* + +# Databases +*.db +*.sqlite diff --git a/PROJECT_STATE.md b/PROJECT_STATE.md new file mode 100644 index 0000000..d906a29 --- /dev/null +++ b/PROJECT_STATE.md @@ -0,0 +1,1672 @@ +# PulseOx Monitor - Project State & Technical Documentation + +**Current Version:** v3.68 (Fixed Double Transform Bug) +**Last Updated:** November 30, 2025 +**Status:** Production - unified detection, whole-number OCR, Signal notifications + +āš ļø **IMPORTANT:** Update VERSION constant in pulseox-monitor.go and this document with every build! + +**For AI Assistant:** You MUST increment the version number in both pulseox-monitor.go and this document whenever you make ANY code changes. This is not optional - it's a critical part of the workflow. + +--- + +## šŸ”§ For New Chat Sessions: Data Access + +**You have full filesystem access to `/Users/johanjongsma/pulse-monitor/`** + +Use these tools to work with the project: +- `Filesystem:read_file` - Read source code, logs, config files +- `Filesystem:write_file` - Create new files +- `Filesystem:edit_file` - Modify existing code (preferred for edits) +- `Filesystem:list_directory` - See what files exist +- `Filesystem:move_file` - Move/rename files (e.g., approving training digits) +- `Filesystem:search_files` - Find files by pattern + +**Key files you'll need:** +- Source: `pulseox-monitor.go`, `ocr.go`, `layout_detection.go`, etc. +- Config: `config.yaml` +- Training: `training_digits/*.png`, `training_digits/invalid/*.png` +- Output: `review/*.png`, `review/review.html` +- Logs: `pulseox-monitor_*.log` + +**DO NOT waste time asking how to access files - just use the filesystem tools directly!** + +--- + +## Project Overview + +This system monitors a Masimo Rad-G pulse oximeter by reading its display via RTSP camera feed. It uses computer vision (OpenCV/gocv) to perform OCR on the SpO2 and heart rate values, then posts readings to Home Assistant for tracking and alerting. + +### Why This Exists +The Masimo pulse oximeter doesn't have a data output port, so we use a camera pointed at its display. The display has significant overexposure and light leakage issues, making traditional OCR (Tesseract) unreliable. We developed a custom template-matching approach that handles these challenging conditions. + +--- + +## Architecture + +### Main Components + +1. **pulseox-monitor.go** - Main application (v3.10) + - Unified frame processing loop + - Interruptible sleep for graceful shutdown + - Smart escalation strategy + - Signal handling (Ctrl+C) + +2. **processor.go** - Frame processor + - OCR execution + - Result validation + - Home Assistant posting + +3. **validators.go** - Validation handlers + - Corruption detection + - Confidence checking + - Stability validation (hindsight) + +4. **ocr.go** - Recognition engine + - Template matching for digits 0-9 + - Invalid pattern detection (corruption) + - `hasOneAt()` function for detecting narrow '1' digits + - Dynamic width calculation for 2-digit vs 3-digit displays + +5. **layout_detection.go** - Display locator + - Finds SpO2 and HR display areas in frame + - Position-based detection (rightmost edge method) + - Handles digit fragmentation + +6. **normalize.go** - Frame normalization + - Screen width detection + - Scaling to 860px width + - Layout detection with normalization + +7. **frame_source.go** - Frame sources + - RTSP streaming source + - Single-file test source + - Interruptible via `IsActive()` method + +8. **types.go** - Shared data structures + - ProcessingState with ConsecutiveFailures counter + - Reading, ProcessingResult types + - Status and Action enums + +9. **homeassistant.go** - Data posting + - REST API integration + - Posts only when values change + - Separate sensors: `sensor.pulse_ox_spo2`, `sensor.pulse_ox_hr` + +10. **html_report.go** - Review interface + - Dark-themed HTML generated on shutdown + - Shows all processed frames with confidence scores + - Visual indicators for unstable readings + +11. **timestamp_ocr.go** - Timestamp validation + - Extracts camera timestamp from frame + - Uses Tesseract OCR + - Validates against server time + - Warns if lag >10 seconds + +### Data Flow + +``` +RTSP Stream / File → Frame Capture (every 500ms) → +Timestamp validation (every 10th frame) on **COLORED** frame → +**Clone frame for error saving** (grayscale → threshold at 180 → save as rawThresholded) → +Preprocessing (crop 68px top, rotate 90° CW) on **COLORED** frame → +**THRESHOLD ONCE at 180** (grayscale → binary) AFTER preprocessing → +Unified Detection (baseline rotation + scale + display areas) → +Apply transforms (rotate + scale to 860px) → +Display area extraction (280px wide regions) → +Whole-number template matching OCR → +Validation (physiological limits, confidence) → +Home Assistant posting +``` + +--- + +## Recognition System + +### Whole-Number Template Matching + +We use a library of whole-number images extracted from the actual pulse oximeter display. Instead of recognizing individual digits, the system matches entire display values (50-130) against templates. + +**Template organization:** +- Numbers 50-130: Each in own directory `training_digits/NN/` +- Multiple variants per number (1.png, 2.png, etc.) +- Templates shared between SpO2 and HR displays + +**Template location:** `/Users/johanjongsma/pulse-monitor/training_digits/` + +**Web-based approval workflow:** +- HTTP server on port 8085 during runtime +- review.html shows crops from each frame +- Type number → Enter → moves to appropriate template directory +- Auto-incrementing variant numbers + +### Binary Threshold + +The system converts grayscale frames to binary using a configurable threshold: + +- **BINARY_THRESHOLD constant**: Currently 180 (was 240) +- Lower values capture fainter displays (e.g., when using sponge mount) +- Applied consistently across all processing paths +- Pre-threshold grayscale saved to `test_output/pre_threshold_gray.png` for tuning + +### Unified Detection System + +- **Baseline-based rotation**: Detects rotation from graph baseline endpoints +- **Scale normalization**: Scales to 860px baseline width for consistent coordinates +- **Display area detection**: Finds SpO2 and HR rectangles in scaled/rotated space +- **Frame rate**: 2fps (500ms intervals) +- **Internal retry loop**: Up to 6 frames attempted before escalating to wait periods +- **Detection recovery**: 3 fast retries after each wait period expires + +--- + +## Exception Handling System + +The system has three independent exception handlers that run in sequence: + +### 1. Confirmed Corruption (Priority: Highest) +**Trigger:** Any digit recognized as `-1` (matched invalid template with >70% confidence) + +**Behavior:** +- Log to file (silent to console) +- Skip corrupted frame +- Continue to next frame through normal processing +- No special retry, no layout re-detection + +**Why this approach:** +Corruption is a recognition failure, not a camera/lighting issue. The next frame will naturally be clean. + +**Invalid templates:** +Stored in `training_digits/invalid/` - patterns like white blobs, UI elements, or corrupted digits that should never be interpreted as valid numbers. + +### 2. Low OCR Confidence +**Trigger:** Average confidence < 85% for SpO2 or HR, OR negative values (unrecognized digits) + +**Behavior:** +1. **First occurrence:** + - Log "āš ļø Low confidence, retrying with next frame..." or "[UNRECOGNIZED] SpO2=X, HR=Y" + - Read next frame immediately + - Re-detect layout (handles camera movement/vibration) + - Process retry frame + - If successful (high confidence + values changed): perform stability check and post + +2. **Second consecutive low confidence:** + - Log "āš ļø Low confidence after retry, pausing 2 seconds" + - Give up, wait 2 seconds before continuing + +**Why re-detect layout:** +Low confidence or unrecognized digits often indicate camera movement, lighting changes, focus issues, or temporary blur. Re-detecting layout helps recover from these transient problems. + +### 3. Large Delta (Hindsight Validation) +**Trigger:** Values changed >3 points from last stable reading (e.g., SpO2: 95→92 or HR: 70→75) + +**Behavior:** +1. Hold new reading as "pending" (don't post yet) +2. Store baseline (value before the spike) +3. Wait for next reading +4. **If next reading confirms trend** (moves in same direction): + - Post the held reading + - Post the confirming reading +5. **If next reading contradicts** (moves opposite direction or returns to baseline): + - Discard held reading as glitch + - Post the new reading + +**Why hindsight validation:** +Physiological changes are gradual. Sudden spikes >3 points are usually recognition glitches or transient display artifacts. This prevents false alarms while allowing real trends. + +**Example scenario:** +``` +Baseline: SpO2=95 +Reading 1: SpO2=91 (Ī”4) → HOLD, don't post +Reading 2: SpO2=89 (Ī”2 from held) → Trend confirmed, post 91 then 89 +``` + +vs. + +``` +Baseline: SpO2=95 +Reading 1: SpO2=91 (Ī”4) → HOLD, don't post +Reading 2: SpO2=95 (back to baseline) → Discard 91 as glitch, post 95 +``` + +--- + +## Key Design Decisions + +### 1. Change-Based Posting Only +**Decision:** Only post when SpO2 or HR values actually change. + +**Rationale:** +- Reduces Home Assistant database bloat +- Avoids unnecessary network traffic +- Still captures all meaningful changes + +### 2. Template Matching Over Tesseract OCR +**Decision:** Use custom template matching instead of Tesseract. + +**Rationale:** +- Pulse oximeter display has severe overexposure +- Tesseract unreliable with bright backgrounds +- Template matching: 90%+ accuracy vs. Tesseract: ~60% +- We have stable digit shapes - perfect for template matching + +### 3. Fixed Layout After Startup +**Decision:** Detect layout once, lock it, only re-detect on low confidence retry. + +**Rationale:** +- Camera is stationary (mounted on tripod) +- Display position doesn't change +- Layout detection is expensive (~10-20ms) +- Re-detection only needed if camera moves/vibrates + +### 4. No Immediate Corruption Retry +**Decision:** Don't immediately retry after detecting corruption; just skip to next frame. + +**Rationale:** +- Corruption is a recognition issue, not camera issue +- Next frame (133ms later at 15fps) will naturally be different +- Avoids complexity of retry-within-retry scenarios +- Simpler code, same reliability + +### 5. Processing Every 4th Frame +**Decision:** Process every 4th frame (~3.75 fps from 15fps stream). + +**Rationale:** +- Balance between responsiveness and CPU usage +- SpO2/HR don't change faster than ~1 reading/second physiologically +- Allows time for processing, posting, and stability checks +- Adjusted for 15fps camera (was 6th frame for 30fps) +- Can be adjusted: 3rd frame = ~5fps, 5th frame = ~3fps + +### 6. Dark Theme HTML Review +**Decision:** Dark background with light text in review HTML. + +**Rationale:** +- Easier on eyes during long review sessions +- Better contrast for confidence scores +- Color-coded indicators more visible + +### 7. No FPS Display +**Decision:** Don't display stream FPS statistics. + +**Rationale:** +- Stream FPS is constant (30fps from camera) +- FPS logging adds noise to console output +- Processing rate (every 6th frame) is more relevant than raw stream FPS +- Keeps console focused on actual readings and warnings + +### 8. Digit Validation for '8' and '0' +**Decision:** Validate recognized '8' and '0' digits by checking for characteristic holes. + +**Rationale:** +- Template matching can falsely recognize corrupted/partial digits as '8' or '0' +- '8' validation: checks for TWO holes (at 30% and 70% Y, scanning 40-50% X) +- '0' validation: checks for ONE hole (at 50% Y, scanning 30-70% X) +- Wider scan range for '0' (30-70%) than '8' (40-50%) because '0' hole is larger +- If validation fails, digit marked as -1 (invalid) → triggers retry/re-detection +- Prevents false alarms from misrecognized digits +- Simple pixel-based validation is fast and reliable + +### 9. Alarm Mode Decolorization +**Decision:** Pre-process colored alarm backgrounds before OCR to normalize them to black/white. + +**Rationale:** +- Pulse oximeter displays colored backgrounds when in alarm mode (low SpO2 or abnormal HR) +- SpO2 alarm: Yellow/orange background, HR alarm: Cyan/blue background +- Normal thresholding fails with colored backgrounds +- Decolorization converts: white digits (RGB all > 240) → white (255), everything else → black (0) +- **Applied ONLY to display regions AFTER extraction** (in `recognizeDisplayArea()` function) +- **Layout detection works on original colored frames** - grayscale conversion handles colors fine for contour detection +- **Decolorization timing**: + 1. Layout detection: Original colored frame → grayscale → threshold → find contours (works fine) + 2. Extract display regions: Still colored (280x200px = 56K pixels) + 3. Decolorize extracted regions: Only ~100K pixels total (both displays) + 4. Continue with OCR: grayscale → threshold → template matching +- Each display region is ~280x200px = 56K pixels +- Decolorizing two display regions (~100K pixels) takes <10ms +- Threshold of 240 ensures only pure white pixels (digits/borders) are preserved, not grays +- No performance impact - simple pixel-wise operation on small regions +- Enables reliable digit recognition regardless of alarm state +- Critical for 24/7 monitoring where alarms are expected + +--- + +## Environmental Considerations + +### Light Leakage Issues +**Problem:** Ambient light creates false contours and affects brightness detection. + +**Solutions implemented:** +- Increased brightness threshold to 170 (from lower values) +- More selective contour filtering +- Height filters to exclude oversized boxes (<200px) + +**Manual mitigation:** +Position towel/cloth to block light from reflecting off display surface. + +### Camera Positioning +**Critical:** Camera must be positioned to minimize glare and ensure consistent framing. + +**Current setup:** +- Tripod-mounted camera +- RTSP stream from network camera +- Fixed zoom/focus (no autofocus) + +--- + +## Files and Directories + +### Source Files +- `pulseox-monitor.go` - Main application +- `processor.go` - Frame processor +- `validators.go` - Validation handlers +- `ocr.go` - Recognition engine +- `layout_detection.go` - Display finder +- `normalize.go` - Frame normalization +- `frame_source.go` - Frame sources (RTSP/file) +- `types.go` - Shared data structures +- `helpers.go` - Utility functions (logf) +- `homeassistant.go` - API integration +- `html_report.go` - Review page generator +- `timestamp_ocr.go` - Timestamp validation +- `config.go` - Configuration loading + +### Configuration +- `config.yaml` - Camera URL and Home Assistant settings + +### Training Data +- `training_digits/*.png` - Valid digit templates (0-9) +- `training_digits/invalid/*.png` - Corruption patterns + +### Output +- `review/` - Frame captures and digit images (cleaned each run) + - `f{N}_boxes.jpg` - Visualization of detected areas + - `f{N}_{display}_checks.png` - Digit detection analysis + - `f{N}_{display}_digit[1-3].png` - Individual digit crops + - `f{N}_{display}_full.png` - Full display with recognized value + - `review.html` - Live-updated HTML (append-only) +- `raw_frames/` - Raw processed frames for failed recognitions (cleaned each run) + - `raw_YYYYMMDD-NNNNN.png` - **TRUE raw frames** (unprocessed, portrait orientation, with timestamp) + - Only saved when recognition fails (corruption, low confidence, unrecognized) + - Used for debugging and training data collection + - **Can be tested directly**: `./pulseox-monitor raw_frames/raw_*.png` (will go through full preprocessing pipeline) +- `test_output/` - Debug visualizations (cleaned each run) + - Layout detection debug files + - Corruption debug snapshots with timestamps + +### Logs +- `pulse-monitor_YYYYMMDD_HHMMSS.log` - Detailed execution log + - Console shows only: value changes, warnings, errors + - File logs: all frames, timing, confidence scores, decisions + +--- + +## Development Workflow + +### Version Management + +**CRITICAL: Update version with every build!** + +**For AI Assistant (Claude):** +- ALWAYS increment version number when making ANY code changes +- Update BOTH pulse-monitor-new.go and PROJECT_STATE.md +- Add entry to Version History section documenting what changed +- This is MANDATORY, not optional + +**Version increment process:** + - Open `pulseox-monitor.go` + - Update `const VERSION = "v3.XX"` (increment version number) + - Document what changed in version history section below + +2. **After significant changes:** + - Update this PROJECT_STATE.md document + - Keep architecture, design decisions, and troubleshooting current + - Update "Last Verified Working" date at bottom + +3. **Commit discipline:** + - Version bump = code change + - Every feature/fix gets a version increment + - Keep PROJECT_STATE.md in sync with code + +### Adding New Training Digits + +When you find a good digit image in the review files: + +```bash +# Example: Approve f768_hr_digit3.png as digit "1" +# Creates training_digits/1_2.png (next available variant) +``` + +Current naming: `{digit}_{variant}.png` (e.g., `7_1.png`, `7_2.png`) + +### Adding Invalid Patterns + +When you find corrupted/glitchy images: + +```bash +# Move to invalid folder with descriptive name +# Creates training_digits/invalid/invalid_X.png +``` + +### Testing Changes + +1. Edit code on Mac +2. Compile: `./build.sh` +3. Run: `./pulseox-monitor` +4. Monitor console for value changes and warnings +5. Check log file for detailed diagnostics +6. Review HTML on shutdown (Ctrl+C) for visual verification + +### Performance Tuning + +**Frame rate:** +- Change `skipCount` parameter in NewRTSPSource call (currently 4) +- Located in pulse-monitor-new.go +- Values: 3=~5fps, 4=~3.75fps, 5=~3fps (for 15fps stream) + +**Confidence threshold:** +- Currently 85% for high confidence +- Located in multiple places in main loop +- Lower = more false positives, Higher = more retries + +**Delta threshold:** +- Currently 3 points for stability check +- Change in large delta handler +- Lower = stricter (more held readings), Higher = more lenient + +--- + +## Known Limitations + +1. **Camera-dependent:** Requires good camera positioning and lighting +2. **Display-specific:** Templates are specific to Masimo Rad-G display fonts +3. **No real-time HTML:** Review HTML only generated on shutdown +4. **Three-digit limitation:** Assumes HR displays as 1XX when >99, may need adjustment for other scenarios +5. **Brightness sensitivity:** Very bright ambient light can still cause issues + +--- + +## Future Enhancements (Planned) + +### Docker Deployment +**Target:** Ubuntu 22.04 server with Docker Compose + +**Requirements:** +- Dockerfile with Go + OpenCV +- Mount volumes: config.yaml, training_digits/, review/, logs/ +- Network access to RTSP stream and Home Assistant +- docker-compose.yml integration with existing HA setup + +**Considerations:** +- Image size: ~500MB-1GB with OpenCV +- Multi-stage build possible for smaller runtime image +- Review file access via scp or lightweight HTTP server + +### SQLite + Live HTML +**Goal:** Real-time progress viewing without stopping app + +**Approach:** +- Store frame data in SQLite as processed +- HTTP endpoint generates HTML on-demand from DB +- Access at `http://server:8080/review.html` anytime + +**Benefits:** +- No shutdown required to review progress +- Historical data queryable +- API potential for other tools + +### Adaptive Thresholds +**Idea:** Auto-adjust confidence and delta thresholds based on recent history + +**Example:** +- If 90% of last 100 frames >95% confidence → raise threshold to 90% +- If frequent false positives → increase delta threshold temporarily + +--- + +## Troubleshooting Guide + +### "No templates found for digit X" +**Problem:** Missing training data for a digit. + +**Solution:** Run system, review output, approve good images for that digit. + +### Frequent low confidence warnings +**Causes:** +- Camera moved/vibrated +- Lighting changed +- Focus drifted +- Display brightness changed + +**Solutions:** +- Check camera mounting +- Adjust lighting (block reflections) +- Re-focus camera if needed +- Restart to re-detect layout + +### False corruption detection +**Problem:** Valid digits matched as invalid. + +**Solution:** Review invalid templates, remove overly broad patterns. + +### Large delta causing unnecessary holds +**Problem:** Real physiological changes >3 points being held. + +**Solution:** +- Increase delta threshold (e.g., to 5) +- Or adjust hindsight validation logic +- Consider different thresholds for SpO2 vs HR + +### Values not posting to Home Assistant +**Checks:** +1. Home Assistant URL correct in config.yaml? +2. Network connectivity? +3. Sensors created in HA configuration? +4. Check log file for POST errors +5. Are values actually changing? (change-based posting only) + +--- + +## Performance Characteristics + +**Typical timing per frame (v3.48):** +- Frame acquisition: 150-250ms (camera waiting time) +- Preprocessing: 8-12ms +- Frame scaling: 3-5ms +- SpO2 recognition: 9-12ms +- HR recognition: 9-12ms +- Validation: 0-1ms +- File I/O: 0ms (normal), 40-50ms (failures only) +- Home Assistant POST: 5ms (when values change) +- **Total: 25-35ms per frame** (processing only, excluding camera wait) +- Timestamp check (every 10th frame): ~22ms (optimized with reused OCR client) + +**Debug mode (with /debug flag):** +- Additional file I/O: +40ms (saves all digit images) +- Total: 65-75ms per frame + +**With every 4th frame processing at 15fps:** +- ~3.75 readings per second maximum +- Actual posting rate: varies (only when values change) +- CPU usage: Low (single-threaded, mostly idle) +- Timestamp validation: Every ~2.5 seconds (~22ms each) + +--- + +## Version History + +### v3.68 (Current - Fixed Double Transform Bug) +- **CRITICAL FIX: Detection no longer modifies input frames**: Fixed bug where frames were being transformed twice + - **Problem**: Detection was rotating+scaling internally AND returning coordinates in transformed space + - **Then**: Main loop was rotating+scaling AGAIN before applying those coordinates + - **Result**: Display boxes extracted at completely wrong positions (showing partial digits) + - **Example**: SpO2 crop showing only "0" instead of full "98" display + - **Fix**: Detection now works on its own internal copy: + - Clones the input frame (doesn't modify original) + - Performs rotation+scaling on the clone for detection purposes + - Returns only rotation angle, scale factor, and box coordinates (in transformed space) + - Does NOT return the transformed frame + - Main loop applies SAME transforms to match coordinate space + - **Code changes**: + - `DetectRotationAndWidth()` clones input, works on copy, returns only DetectionResult + - `tryDetectionWithRetries()` no longer returns binary frame + - Main loop applies transforms once to match detection coordinates + - This fixes the low confidence OCR failures at the new camera distance + +### v3.67 (Enhanced Detection Debugging) +- **Added comprehensive visual debugging for baseline detection**: Two debug images show all detection steps + - **debug_baseline_detection.png**: Shows baseline detection on original frame + - Cyan rectangles: Two baseline sections with widths + - Green circle "L": Left rotation point (10px from left edge of left section) + - Red circle "R": Right rotation point (10px from right edge of right section) + - Yellow line: Baseline between rotation points (used to calculate rotation angle) + - Magenta lines: Graph band boundaries (tallest band containing waveform) + - Yellow text overlay: Baseline width, rotation angle, scale factor + - **debug_final_boxes.png**: Shows final display boxes on scaled/rotated frame + - Red rectangle: SpO2 display area (280px wide, anchored to right edge) + - Cyan rectangle: HR display area (280px wide, anchored to right edge) + - Green lines: Digit band boundaries (band after graph band) + - Yellow text: Final scaled image dimensions + - Helps diagnose why scale and rotation are jumping between consecutive frames + - Run with `./pulseox-monitor /debug` to generate these images + +### v3.66 (Configurable Threshold + Pre-threshold Debug) +- **Lowered binary threshold to 180**: Display brightness was fainter with sponge mount + - Added `BINARY_THRESHOLD` constant in pulseox-monitor.go for easy adjustment + - Applied consistently across all 4 threshold locations: + - Raw frame saving + - Main preprocessing + - Detection retry loop + - PreprocessFrame in detection.go + - Previous: 240 (too high for fainter displays) + - New: 180 (captures fainter baseline and digits) +- **Pre-threshold grayscale debug image**: Saves first frame's grayscale before thresholding + - Saved to `test_output/pre_threshold_gray.png` + - Allows inspection of actual brightness values to tune threshold + - Only saves on first processed frame + +### v3.65 (Detection Internal Retry Loop) +- **Detection now retries internally with fresh frames**: Much faster recovery after unplugging + - New `tryDetectionWithRetries()` function fetches up to 5 additional frames if initial detection fails + - Each frame goes through preprocess → threshold → detection + - If any frame succeeds, returns that frame for continued processing + - Logs "Detection succeeded on retry #X" when internal retry works + - External escalation (wait periods) only triggered after all internal retries fail + - Result: 6 attempts (~3 seconds) before any wait period kicks in + +### v3.64 (Signal Notification System) +- **Home Assistant Signal notifications**: Alerts sent via Signal Messenger + - Startup notification: "Monitoring started" when app connects + - Detection failure: "Detection failing for 5+ minutes" after prolonged failure + - Missing template warning: "Big value jump detected - template X might be missing!" +- **Flood protection mechanism**: Prevents notification spam + - Startup/detection failure: Once per hour maximum (marker files with timestamps) + - Missing templates: Once ever per template number (persistent list) + - Marker files: `.startup_notified`, `.detection_fail_notified`, `.missing_template_notified` +- **Detection retry improvements**: Better recovery after wait periods + - After wait expires, reset DetectFailStage to 4 (gives 3 fast retries before next wait) + - Set ConsecutiveFailures = 0 during detection failures (prevents NextImmediate spam) + +### v3.63 (Physiological Limit Adjustment) +- **Minimum valid value raised to 45**: Values below 45 rejected and not posted to HASS + - Previous: 40 + - Rationale: HR/SpO2 below 45 are extremely unlikely in living patients + - Helps filter out misrecognition of partial digits + +### v3.62 (Low Confidence Backoff Strategy) +- **Escalating backoff for low confidence**: Progressive wait strategy + - Stage 1: Fresh frame immediately + - Stage 2: Reset detection, try immediately + - Stage 3: Wait 10s + reset detection + - Stage 4: Wait 30s + reset detection + - Stage 5+: Wait 60s + reset detection +- **Detection failure backoff**: Similar escalation + - Stages 1-6: Immediate retry (~3 seconds at 2fps) + - Stage 7: Wait 10s + - Stage 8: Wait 30s + - Stage 9+: Wait 60s +- **Frame consumption during waits**: Keeps reading frames to prevent RTSP buffer buildup + +### v3.61 (Frame Rate + Duplicate Detection) +- **Increased frame rate to 2fps**: Changed from 2 seconds to 500ms between frames +- **Duplicate frame detection**: Skip OCR if frame matches previous exactly + - `imagesMatch()` function compares binary frames + - 100% pixel match required to use cached results + - Reduces CPU usage when display is static + +### v3.60 (Whole-Number OCR Architecture) +- **Complete rewrite of OCR system**: Match entire numbers instead of individual digits + - Range: 50-130 for both SpO2 and HR + - Templates shared between displays + - `loadTemplates()` loads from training_digits/NN/*.png + - `matchNumber()` tries all templates for a number, returns best confidence + - `recognizeDisplayArea()` matches entire display area against all numbers +- **Web-based template approval**: HTTP server on port 8085 + - review.html interface with approval workflow + - Type number → Enter → moves crop to training_digits/NN/ + - Auto-incrementing variant numbers (1.png, 2.png, etc.) +- **Always saves crops**: Every frame saves clean crops to review/ for potential approval + +### v3.59 (Unified Detection) +- **Baseline-based rotation detection**: Uses graph baseline to determine rotation angle + - Finds two sections in baseline (before/after gap) + - Calculates rotation from baseline endpoints + - Scales to 860px baseline width +- **Fixed coordinate systems**: All display areas in scaled, rotated coordinates +- **Detection result struct**: Returns rotation, scale, SpO2/HR rectangles + +### v3.58 (Detection Improvements) +- **Rotation correction**: Applies detected rotation to correct camera tilt +- **Scale normalization**: Scales all frames to consistent 860px baseline width +- **Display area detection**: Finds SpO2 and HR display areas after transforms + +### v3.57 (True Raw Frame Saving) +- **CRITICAL FIX: Raw frames are now truly raw (before preprocessing)** + - **Problem in v3.56**: "Raw" frames were saved AFTER preprocessing (cropped, rotated, thresholded) + - **User confusion**: Testing with "raw" frames didn't work as expected - frames were already processed + - **Fix**: Clone and threshold raw frame IMMEDIATELY after acquisition, before ANY preprocessing + - **New flow**: + 1. Acquire colored frame from camera (portrait, with timestamp) + 2. **Clone frame → grayscale → threshold at 240 → save as rawThresholded** + 3. Continue with original colored frame → preprocess (crop + rotate) → threshold → OCR + 4. On error: Save rawThresholded to raw_frames/ + - **Result**: Saved "raw" frames are truly raw (portrait orientation, timestamp visible, not cropped/rotated) + - **File size**: ~100KB PNG (thresholded) vs 2MB colored JPG + - **Benefits**: + - Can test raw frames through full preprocessing pipeline + - See exactly what came from camera before any processing + - Small file size (thresholded) vs huge colored images + - **Impact**: When testing with raw_frames/raw_*.png, they'll go through crop+rotate+threshold like normal frames + +### v3.56 (Threshold AFTER Preprocessing) +- **CRITICAL FIX: Moved threshold to AFTER preprocessing**: Timestamp overlay was being thresholded! + - **Problem in v3.54-v3.55**: Threshold happened BEFORE preprocessing + - Colored frame → threshold → binary + - Timestamp extracted from binary frame (appeared as B&W) + - Preprocessed binary frame (cropped B&W timestamp) + - **Correct flow now**: + 1. Acquire colored frame + 2. Extract timestamp from **colored** frame (stays colored!) + 3. Preprocess colored frame (crop timestamp + rotate) + 4. **Threshold preprocessed frame** at 240 → binary + 5. Layout detection on binary + 6. OCR on binary + - **Why this is correct**: + - Timestamp area is removed BEFORE thresholding (stays colored) + - Only the actual display area (after crop/rotate) gets thresholded + - Single threshold at 240, happens once, at the right time + - **Impact**: Saved raw frames now show colored timestamp overlay (as expected) + +### v3.55 (Fixed Double Threshold Bug) +- **CRITICAL FIX: Removed duplicate threshold in saveThresholdedFrame()**: Frame was being thresholded TWICE + - **Problem**: saveThresholdedFrame() was doing grayscale conversion + threshold at 128 + - **But**: rawFrame parameter is ALREADY thresholded binary (from line 313 in pulseox-monitor.go) + - **Result**: Frames were being thresholded twice - once at 240, then again at 128 + - **Fix**: saveThresholdedFrame() now just saves the frame directly (no conversion, no threshold) + - **Impact**: Overlay/timestamp area should look consistent now + - This was the "extra" threshold causing B&W overlay to look different +- **Confirmed ONLY one threshold operation now**: + - pulseox-monitor.go line 292: Threshold at 240 (the ONLY threshold) + - layout_detection.go: No threshold (works on binary) + - ocr.go: No threshold in recognizeDisplayArea() (works on binary) + - processor.go: saveThresholdedFrame() just saves (no threshold) +- Note: ocr.go still has OLD deprecated recognizeDisplay() function with thresholds, but it's never called + +### v3.54 (Single Threshold Architecture - Had Double Threshold Bug) +- Attempted to threshold once at 240 after frame acquisition +- Removed internal thresholds from layout_detection.go and ocr.go +- BUT missed duplicate threshold in saveThresholdedFrame() → fixed in v3.55 + +### v3.53 (CPU Optimization - Thread Limiting) +- Removed duplicate ProcessedCount increment bug +- Limited OpenCV threads to 1 for minimal overhead + +### v3.52 (CPU Optimization Complete) +- **Cleaned up failed GPU acceleration attempts**: Removed AVFoundation backend code + - **Investigation findings**: GPU hardware acceleration for RTSP streams is not feasible with current OpenCV setup + - AVFoundation doesn't support network streams (RTSP), only local camera devices + - OpenCV's ffmpeg backend has VideoToolbox compiled in but doesn't auto-activate for RTSP + - Would require either: (1) patching OpenCV's videoio plugin in C++, (2) complete rewrite with different library, or (3) local transcoding proxy + - **Final optimization achieved**: CPU reduced from 50% to ~30% (40% reduction) + - Single-threaded OpenCV (gocv.SetNumThreads(1)) eliminated thread overhead + - Homebrew's OpenCV has AVFoundation/VideoToolbox support but only for local devices + - ~15% CPU for RTSP H.264 decoding (software) + ~15% CPU for OCR processing + - **Conclusion**: 30% CPU is acceptable for 24/7 monitoring, fan noise significantly reduced + +### v3.51 (Attempted GPU Acceleration - Reverted) +- **Attempted AVFoundation backend**: Failed for RTSP streams (network streams not supported) + +### v3.50 (Reduced CPU Overhead) +- **Added OpenCV thread limiting**: Dramatically reduced CPU usage and thread count + - **Problem**: OpenCV was creating 40 threads (one per CPU core) for small operations + - **Impact**: Excessive context switching, 65%+ CPU usage for simple 280x200px image operations + - **Fix**: Limited OpenCV to 1 thread with `gocv.SetNumThreads(1)` at startup + - **Rationale**: + - Our images are tiny (280x200px per display) + - We process sequentially, not in parallel + - Processing time <20ms - faster than thread spawning overhead + - Single-threaded = zero thread management overhead + - **Expected result**: + - Thread count drops from ~40 to ~5-8 (only Go runtime threads) + - CPU usage drops from 65% to <20% + - No performance loss (processing still <20ms per frame) + - **Startup message**: "šŸ”§ OpenCV threads limited to 1 (single-threaded for minimal overhead)" + - **Testing**: v3.50 with 2 threads showed 23 threads/48% CPU, so moved to 1 thread for further optimization + +### v3.49 (Fixed Frame Counter Bug) +- **CRITICAL FIX: Removed duplicate ProcessedCount increment**: Fixed skipped frame numbers + - **Problem**: `state.ProcessedCount++` was being called twice: + 1. In main loop (pulseox-monitor.go) - for every frame acquired + 2. In processor.go - when values changed and HASS posted + - **Result**: Every time values changed, the counter was incremented twice + - **Symptom**: Frame numbers would skip (e.g., #61, #62, skip #63, #64...) + - **Pattern**: Always skipped the frame number 2 positions after a HASS post + - **Fix**: Removed the duplicate increment in processor.go + - **Now**: Frame numbers increment consistently: #61, #62, #63, #64... + +### v3.48 (Major Performance Improvements) +- **CRITICAL: Moved digit image saves to DEBUG_MODE only**: Massive performance improvement + - **Problem**: OCR was saving 8 PNG files per frame (4 per display Ɨ 2 displays) + - checks.png visualization + - digit1.png, digit2.png, digit3.png (individual digits) + - full.png (labeled composite) + - **Impact**: ~5ms per PNG write Ɨ 8 files = 40ms of unnecessary file I/O per frame + - **Fix**: Wrapped all digit image saves in `if DEBUG_MODE` checks + - **Result**: OCR now takes ~9-12ms total (just template matching, no file I/O) + - **When needed**: Run with `/debug` flag to generate all digit images +- **Only save boxes.jpg on failures**: Reduced file I/O for successful frames + - Previously: Saved layout visualization (boxes.jpg) every time values changed (~51ms) + - Now: Only saves boxes.jpg when OCR fails (corruption, unrecognized, low confidence) + - Successful frames: No extra file I/O beyond what's needed +- **Performance comparison**: + - Before: Total ~80ms (Prep 10ms + Scale 4ms + OCR 20ms + FileIO 40ms + Valid 1ms + HASS 5ms) + - After: Total ~30ms (Prep 10ms + Scale 4ms + OCR 10ms + FileIO 0ms + Valid 1ms + HASS 5ms) + - **~60% faster processing** for normal operation +- **Debug workflow unchanged**: `/debug` flag still generates all images as before + +### v3.47 (Timing Instrumentation) +- **Added comprehensive timing measurements with /timing flag**: Enables detailed performance analysis + - **New /timing command line flag**: Activates timing mode (./pulseox-monitor /timing) + - **Horizontal timing table**: Shows timing breakdown for each frame + - Frame | Acquire | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total + - **Header printed every 20 frames**: Prevents scroll-back issues + - **Timing measurements**: + - **Acquire**: Frame acquisition from RTSP source - WAITING TIME (not processing) + - **Prep**: Preprocessing (crop timestamp + rotate 90°) + - **Scale**: Frame scaling to normalized width (if needed) + - **OCR_SpO2**: SpO2 display recognition (template matching) + - **OCR_HR**: HR display recognition (template matching) + - **Valid**: Validation checks (corruption, confidence, stability) - accumulated across all checks + - **FileIO**: Saving review images and debug files - accumulated across all file operations + - **HASS**: Home Assistant POST request (only when values change) + - **Total**: Wall-clock processing time (Prep + Scale + OCR + Valid + FileIO + logging overhead) + - **Note**: Acquire is separate (camera waiting time). Total may not equal sum due to logging/overhead. + - **Purpose**: Identify performance bottlenecks (e.g., excessive file I/O) + - **Implementation**: + - Added TimingData struct in types.go + - Added TIMING_MODE global flag + - Added printTimingTable() function in helpers.go + - Added timing measurements throughout processFrame() and processFrames() + - Timing data passed through entire processing pipeline + - **Example output**: + ``` + Frame | Acquire | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total + ------|---------|------|-------|----------|--------|-------|--------|------|------- + #123 | 2ms | 6ms | 0ms | 4ms | 4ms | 2ms | 38ms | 5ms | 61ms + #124 | 2ms | 6ms | 0ms | 4ms | 4ms | 1ms | 39ms | 5ms | 61ms + ``` + - **Benefits**: + - Quickly identify slow operations + - Track performance trends over time + - Validate optimization efforts + - Debug unexpected delays + +### v3.46 (Fixed "1" Detection + Raw Frame Improvements) +- **CRITICAL FIX: Lowered "1" digit detection threshold from 90% to 85%** + - **Problem**: "1" digits with 87-89% confidence were being rejected, causing misrecognition + - **Example**: "71" was being cut incorrectly because the "1" wasn't detected (87.1% < 90% threshold) + - **Result**: Wrong width used (100px instead of 72px), throwing off all cut positions + - **Fix**: Threshold lowered to 85% in `hasOneAt()` function (ocr.go:196) + - **Impact**: More reliable detection of "1" digits, especially with slight variations in brightness/focus +- **Auto-enable DEBUG_MODE for file processing** + - When testing with a file (e.g., `./pulseox-monitor raw_frames/thresh_*.png`), DEBUG_MODE automatically enabled + - Eliminates need to add `/debug` flag manually for single frame testing + - Implementation: pulseox-monitor.go:65-66 in `runSingleFrameMode()` +- **Added comprehensive debug output for digit detection** + - Shows display width at start of detection + - For each digit (3→2→1): + - Check position being tested + - hasOneAt() extraction region and confidence score + - Whether detected as "1" or not + - Width being used (72px vs 100px) + - Running total width + - Final CUT position calculated + - Region extraction coordinates and widths for all three digits + - Makes debugging cut position issues trivial - can see exact logic flow +- **Fixed raw frame saving to be truly raw** + - **Previous**: Frames were saved AFTER preprocessing (rotated, cropped) + - **Now**: Frames saved BEFORE any processing (portrait, with timestamp, untouched) + - **Processing applied**: Only thresholded (grayscale → binary) to save space as PNG + - **Benefit**: Can test raw frames with full preprocessing pipeline + - **Implementation**: Clone frame immediately after reading, pass to processFrame() +- **Fixed duplicate logging in test mode** + - **Problem**: Messages with `Both` target were printed twice in test mode + - **Cause**: globalLogger was set to os.Stdout (same as console output) + - **Fix**: Set globalLogger = nil in test mode (pulseox-monitor.go:71) + - **Result**: Clean, non-duplicated output when testing with files + +### v3.45 (Added Processing Time Measurements) +- **Added comprehensive timing measurements**: Now shows how long each frame takes to process + - **Frame processing time**: Displayed in console output: `SpO2=97%, HR=76 bpm [45ms]` + - **Frame acquisition interval**: Logged to file showing time since last frame acquired + - **Purpose**: Understand actual performance vs camera lag vs processing bottlenecks + - **What's measured**: + - Frame acquisition interval (target: 250ms for 4 fps) + - Total processing time per frame (OCR + validation + file I/O) + - Logged for both changed values (console) and unchanged values (file only) + - **Output examples**: + - Console: `SpO2=97%, HR=76 bpm [45ms]` (when value changes) + - Log file: `Frame #123: SpO2=97%, HR=76 bpm (no change) - processed in 42ms` + - Log file: `[TIMING] Frame acquired: +251ms since last (target: 250ms)` + - **Benefits**: + - Identify if processing time is exceeding 250ms target + - See actual frame rate vs target 4 fps + - Distinguish between camera timestamp lag and processing delays + - Debug performance issues with concrete data + - This addresses confusion about camera timestamp lag vs actual processing performance + +### v3.44 (Save Thresholded PNGs, Not Colored JPGs) +- **Changed error frame format**: Now saves thresholded black/white PNG instead of colored JPG + - **Rationale**: Thresholded frames show exactly what the OCR sees, making debugging much more effective + - **Previous**: Saved raw colored camera frames that required manual processing to debug + - **Now**: Saves thresholded (binary) PNG showing the exact image fed to template matching + - **Processing**: Grayscale conversion → threshold at 128 → save as PNG + - **Filename format**: `raw_frames/thresh_YYYYMMDD-NNNNN.png` + - **Saved on**: + - Corruption detection (invalid template match) + - Unrecognized digits (negative values) + - Low confidence (first attempt) + - Low confidence after retry + - **Benefits**: + - Can replay frames directly through OCR pipeline + - See exactly what template matching is working with + - Identify missing templates or threshold issues immediately + - No need to manually preprocess frames for debugging + - PNG format (lossless) vs JPG (lossy with compression artifacts) + - **Storage**: ~50-100KB per frame vs 200-400KB for colored JPG + +### v3.43 (Fixed Timing + Removed Raw Frames) +- **Fixed frame acquisition timing**: 250ms timer now starts from frame ACQUISITION, not processing completion + - **Previous behavior**: Waited 250ms after processing finished (variable frame rate) + - **New behavior**: Waits 250ms from when frame was acquired (consistent 4 fps) + - **Implementation**: Set `lastProcessedTime = now` BEFORE returning frame, not after processing + - **Example**: If processing takes 50ms, next frame acquired at exactly 250ms from previous acquisition + - **Result**: Guaranteed exactly 4 fps regardless of processing time variations +- **Removed raw frame saving**: System no longer saves unprocessed camera frames + - **Rationale**: Thresholded digit images (step4) are already saved for all failures + - **What's saved on errors**: + - review/f{N}_spo2_digit1.png, digit2, digit3 (thresholded crops) + - review/f{N}_hr_digit1.png, digit2, digit3 (thresholded crops) + - review/f{N}_spo2_full.png, hr_full.png (labeled displays) + - review/f{N}_spo2_checks.png, hr_checks.png (visualization) + - **Removed**: + - raw_frames/ directory no longer created + - rawFrame parameter removed from processFrame() + - rawFrame cloning removed from processFrames() loop + - **Benefits**: Reduced memory usage, simpler code, less disk I/O + - Thresholded images are more useful for debugging than raw camera frames + +### v3.42 (Time-Based Frame Processing) +- **Changed from skip-based to time-based frame processing**: More reliable and precise timing + - **Previous approach**: Process every 4th frame (variable timing depending on stream fps) + - **New approach**: Process frames at exactly 4 fps (250ms minimum interval) + - **Implementation**: + - Track `lastProcessedTime` timestamp + - Only process frame if 250ms has elapsed since last processed frame + - Guarantees consistent 4 fps processing rate regardless of stream fps + - **Benefits**: + - Predictable, consistent processing rate + - Works correctly even if stream fps varies + - Simpler to understand and configure + - **Updated NewRTSPSource()**: Removed `skipCount` parameter, now uses hardcoded 250ms interval + - Console message: "šŸ“Š Processing frames at 4 fps (250ms minimum interval)" +- **Confirmed**: All step debug images only saved in DEBUG_MODE + - step1_original.png (only with /debug flag) + - step2_decolorized.png (only with /debug flag) + - step3_grayscale.png (only with /debug flag) + - step4_thresholded.png (only with /debug flag) + +### v3.41 (Critical Performance Fix) +- **CRITICAL: Fixed excessive debug image writes causing high resource usage** + - **Problem**: Saving 8 debug images PER FRAME (4 per display x 2 displays) + - **Impact**: At 3.75 fps = ~30 images/second = massive disk I/O and CPU usage + - **Images being saved on every frame**: + - step1_original.png (colored extraction) + - step2_decolorized.png (after alarm mode decolorization) + - step3_grayscale.png (after grayscale conversion) + - step4_thresholded.png (after binary threshold) + - **Fix**: Only save these debug images when DEBUG_MODE is enabled + - **Result**: Normal operation now only saves essential files (digit crops, checks, full) + - **To enable debug images**: Run with `./pulseox-monitor /debug` flag + - **Performance improvement**: ~70% reduction in disk I/O, significant CPU savings + - This was a leftover from debugging alarm mode decolorization that should never have been in production + +### v3.40 (Actionable Layout Detection Errors) +- **Enhanced layout detection error diagnostics**: Now saves debug visualization when "Only 1 digit display found" error occurs + - **Problem**: Error message was not actionable - couldn't see what boxes were detected or why they failed + - **Fix**: Save timestamped debug image showing all center boxes with labels + - **Debug visualization shows**: + - All boxes crossing 50% line (center region) + - Green boxes = qualified as digit displays (height >= 110px) + - Cyan boxes = rejected (too short) + - Box labels showing dimensions: "#0: H=XXpx OK" or "#1: H=XXpx TOO SHORT" + - Red line showing 50% Y position + - Summary at top: "Found: X center boxes, Y digit displays (need 2)" + - **Enhanced console error output**: + - Shows count of center boxes found + - Lists each box with dimensions and qualification status + - "āœ“ QUALIFIED" for boxes that passed height requirement + - "āœ— TOO SHORT (< 110px)" for boxes that were rejected + - Path to debug image: test_output/layout_error_YYYYMMDD_HHMMSS.jpg + - Makes debugging layout failures much faster - can immediately see what system detected + +### v3.39 (Fixed Frame Scaling Order) +- **CRITICAL FIX: Frame scaling now happens in correct order** + - **Problem**: Boxes were calculated on original frame, then frame was scaled, causing coordinate mismatch + - **Root cause**: Layout detection calculated boxes on unscaled frame, but processFrame() scaled the frame later + - **Fix**: Layout detection now scales frame IMMEDIATELY after calculating scale factor (Step 4) + - **Process flow**: + 1. detectScreenLayoutAreas() calculates scale from bounding box width + 2. Scales frame to 860px width using gocv.Resize() + 3. **Scales ALL coordinate arrays** (bounding box, allBoxes, significantBoxes) + 4. ALL subsequent processing (contour detection, box calculation) uses scaled frame AND scaled coordinates + 5. Returns boxes in scaled coordinates + scale factor + 6. processFrame() scales every frame using same scale factor + 7. Boxes and frames now match perfectly + - **Code changes**: + - layout_detection.go: Added frame scaling at Step 4 (after scale calculation) + - layout_detection.go: Scale all coordinate arrays (scaledBoundingBox, scaledAllBoxes, scaledSignificantBoxes) + - All debug visualizations now use scaled frame (step 5-15 images) + - Removed tryAcquireLayout() wrapper function (unnecessary abstraction) + - pulseox-monitor.go: Calls detectScreenLayoutAreas() directly, stores layout+scale in state + - processor.go: Keeps scaling logic in processFrame() (needed for every frame) + - **Benefits**: + - Boxes calculated on 860px frame match frames that are scaled to 860px + - No more coordinate mismatches between layout detection and OCR processing + - Simpler code flow - no useless wrapper functions + - Alarm box trimming (Steps 8-10) preserved and working correctly + - **Verified**: Bounding box width log will show ~860px after scaling (was variable before) +- **BUG FIX: Review entries now use correct frame numbers** + - **Problem**: Successful readings used `state.FrameCount` (always 0) instead of actual `frameNum` + - **Result**: All images referenced f0_* instead of actual frame numbers + - **Fix**: Changed 3 instances in processor.go to use `frameNum` parameter + - Affected: Invalid physiological values, successful readings, unstable readings + - Failed readings (corruption, unrecognized, low confidence) were already correct +- **OPTIMIZATION: Only save images for frames added to review** + - **Problem**: Every processed frame saved images, even if values didn't change + - **Result**: review/ folder filled with unused images from StatusNoChange frames + - **Fix**: Delete digit images when values haven't changed (StatusNoChange) + - Saves disk space and makes review folder only contain relevant frames + - Deleted files: f*_spo2_checks.png, f*_spo2_digit[1-3].png, f*_spo2_full.png, f*_hr_*.png + +### v3.38 (Dynamic Frame Normalization) +- **Implemented dynamic frame scaling based on bounding box width**: Handles unstable device position + - **Problem**: Pulse oximeter lays loose on bed, causing frame width to vary + - **Solution**: Measure bounding box width in Step 4 of layout detection + - **Process**: + 1. Calculate scale factor: `scale = 860 / boundingBoxWidth` + 2. Log bounding box width and scale factor to console + 3. Store scale factor in ProcessingState.LockedScale + 4. Apply scaling to every frame before OCR processing + - **Changes**: + - `detectScreenLayoutAreas()` now returns `(*ScreenLayout, float64, error)` - includes scale factor + - `tryAcquireLayout()` captures and stores scale in state + - `processFrame()` applies scaling if `state.LockedScale != 1.0` + - Logs: "šŸ“Š Bounding box width: Xpx, Scale factor: Y (target: 860px)" + - **Benefits**: + - Consistent 860px normalized width regardless of camera position/zoom + - Layout coordinates remain stable across frames + - OCR accuracy maintained even when device moves + +### v3.37 (Fixed Layout Detection Height Threshold) +- **Fixed digit display height threshold**: Changed from > 120px to >= 110px + - **Problem**: Layout detection was failing because digit displays were exactly 110-120px tall + - **Previous code**: Required height > 120px (too restrictive) + - **Fixed code**: Requires height >= 110px (matches MIN_BOX_HEIGHT constant) + - **Added debug logging**: Shows dimensions of each center box and whether it's accepted/rejected + - Helps diagnose layout detection failures + - Consistent with original design intent (MIN_BOX_HEIGHT = 110) + +### v3.36 (Unified Logging) +- **Consolidated all logging to use logMessage() function**: Replaced all fmt.Printf(), fmt.Println(), and inconsistent logging patterns + - **ocr.go**: Converted template loading messages to use logMessage() + - Template warnings → Console + Warning level + - Template loading success → Console + Info level + - **frame_source.go**: Converted frame processing info message + - "Processing every Nth frame" → Console + Info level + - **normalize.go**: Converted all debug output messages + - Decolorization progress → LogFile + Debug level + - Debug file saves → LogFile + Debug/Info level + - **All files now use consistent logging pattern**: + - Startup/shutdown → Console + Info + - Main readings → Both + Info + - Warnings/errors → Both + Warning/Error + - Debug messages → LogFile + Debug (respects DEBUG_MODE flag) + - Progress indicators → Console + Info + - **Benefits**: + - Single source of truth for logging behavior + - Consistent timestamp formatting across all logs + - Proper level-based filtering + - Clean separation of console vs file logging + - Debug mode properly controls debug output + +### v3.35 (Logging System Implementation) +- **Implemented logMessage() function in helpers.go**: New unified logging system + - Target: Console, LogFile, Both + - Level: Debug, Info, Warning, Error + - Single-letter prefixes (D, I, W, E) for clean output + - Automatic DEBUG_MODE filtering for debug messages + - Timestamp formatting: [HH:MM:SS.mmm] + - Eliminates need for checking if logger != nil throughout codebase + +### v3.34 (Correct Order + Debug Images) +- **Fixed order of layout detection**: Center region FIRST, digit displays SECOND + - **Correct process order**: + 1. Find ALL contours with RetrievalList + 2. Collect all significant boxes (>50px width or height) + 3. Calculate bounding box from ALL boxes (not just digit displays) + 4. Find 50% Y line from overall bounding box + 5. Filter to boxes crossing 50% line (center region) + 6. From center boxes, identify digit displays (height >= 110px) + 7. Find alarm icons (center boxes extending beyond digit displays) + 8. Trim digit displays based on alarm icons + 9. Split by center X + 10. Find rightmost X in each half (using box center) + 11. Create fixed-width boxes (280px) + - **Debug images at every step** saved to test_output/: + - layout_step1_grayscale.jpg + - layout_step2_threshold.jpg + - layout_step3_eroded.jpg + - layout_step4_all_boxes.jpg (all boxes >50px in green) + - layout_step5_center_boxes.jpg (boxes crossing 50% line in cyan, red line) + - layout_step6_digit_displays.jpg (digit displays in magenta) + - layout_step7_alarm_icons.jpg (alarm icons in yellow, if any found) + - layout_step8_trimmed_displays.jpg (trimmed displays in green) + - layout_step9_final_boxes.jpg (final SpO2/HR boxes with labels) + - Key fix: Bounding box calculated from ALL contours, not just digit displays + - This prevents missing digit displays that are outside early bounding calculations + +### v3.33 (Contour Debug Logic + Center Region - WRONG ORDER) +- **Implemented contour_debug.go logic combined with center region approach**: As instructed + - **Exact implementation of contour_debug.go detection:** + 1. Find ALL contours with RetrievalList + 2. First pass: Identify digit displays (height >= 110px) + 3. Second pass: Find alarm icons (boxes extending beyond digit displays on right) + 4. Trim digit displays: Find leftmost alarm icon edge that overlaps, set as right boundary + - **Combined with center region logic:** + 5. Calculate bounding box from trimmed digit displays + 6. Find 50% Y line + 7. Filter to displays crossing 50% line (center region) + 8. Split by center X + 9. Find rightmost X in each half (using box center to determine which half) + 10. Create fixed-width boxes (CUT_WIDTH = 280px) + - **Key differences from v3.32:** + - Works with TRIMMED digit displays (after alarm icon trimming) + - Alarm icons properly detected and used to trim display boundaries + - Follows exact contour_debug.go algorithm before applying center logic + - Diagnostic output shows trimming actions when alarm icons detected + +### v3.32 (Fixed Layout Detection - WRONG APPROACH) +- **Reverted to proven layout detection logic**: Fixed incorrect box detection from v3.30 + - **Problem in v3.30/v3.31**: Alarm icon detection was finding wrong boxes (yellow alarm square instead of digit displays) + - **Fix**: Restored working center region approach without alarm icon complications + - **Process**: + 1. Find all significant contours (>50px width or height) + 2. Calculate overall bounding box + 3. Filter to boxes crossing 50% Y line with height >= 110px (digit displays) + 4. Create center region from filtered boxes + 5. Split by center X + 6. Find rightmost X in each half using **box center** to determine which half + 7. Create fixed-width boxes (CUT_WIDTH = 280px) + - Key insight: Use box center (not box edge) to determine left vs right half + - Alarm icon trimming removed for now - adds complexity without clear benefit + - Focuses on reliable detection of actual digit display boxes + - Debug images from v3.31 retained for troubleshooting + +### v3.31 (Decolorization After Extraction) +- **Moved decolorization to after display area extraction**: Layout detection now works on original colored frames + - **Problem**: Layout detection was trying to threshold colored alarm backgrounds, failing to find displays + - **Fix**: Layout detection now works on original colored frame (grayscale + threshold) + - **Decolorization moved**: Now happens in `recognizeDisplayArea()` AFTER extracting the small display regions + - **Process flow**: + 1. Preprocess (crop + rotate) - original colored frame + 2. Detect layout on colored frame (grayscale → threshold → find contours) + 3. Extract display areas (280x200px) - still colored + 4. **Decolorize extracted regions** (only ~56K pixels per display) + 5. Continue with OCR (grayscale → threshold → template matching) + - **Debug images added**: Each processing step now saves debug images: + - `f{N}_{display}_step1_original.png` - Original extracted colored region + - `f{N}_{display}_step2_decolorized.png` - After decolorization + - `f{N}_{display}_step3_grayscale.png` - After grayscale conversion + - `f{N}_{display}_step4_thresholded.png` - After thresholding (ready for OCR) + - Allows visual verification of each processing step + - Decolorization only affects OCR, not layout detection + +### v3.30 (Simplified Layout Detection) +- **Simplified layout detection with alarm icon trimming**: Combined best of both approaches + - **Uses RetrievalList** to find ALL contours (including nested ones like alarm icons) + - **Single contour pass**: No more complicated center region extraction and re-detection + - **Alarm icon detection**: Finds boxes that extend beyond digit displays (clock icons, alarm indicators) + - **Smart trimming**: Trims BOTH SpO2 and HR displays if alarm icons found, otherwise uses regular boxes + - **Process**: + 1. Find all contours with RetrievalList + 2. Identify digit displays (height >= 110px) + 3. Find alarm icons (boxes extending beyond digit displays on the right) + 4. Trim digit displays based on leftmost alarm icon edge (if any) + 5. Split displays into left (SpO2) and right (HR) halves using center X + 6. Find rightmost X in each half for display boundaries + 7. Create fixed-width boxes (CUT_WIDTH = 280px) + - Much simpler and more reliable than previous double-contour approach + - Combines center region logic with direct contour approach from contour_debug.go + +### v3.29 (Correct Decolorization Scope) +- **Fixed decolorization scope**: Removed wasteful full-frame decolorization + - **Problem in v3.28**: Decolorizing entire 1390x2736 frame (3.8M pixels) took ~2000ms + - **Reality**: Decolorization only needed for small display regions during OCR + - **Correct flow**: + 1. Preprocess (crop + rotate) - original colored frame + 2. Detect layout on original frame (works fine with colored backgrounds) + 3. Extract display areas (280x200px each) + 4. Decolorize ONLY display regions in `recognizeDisplayArea()` (~100K pixels) + 5. Run OCR on decolorized regions + - Decolorization already exists in `ocr.go` at the right place (line ~281) + - No need for full-frame decolorization - layout detection works on colored frames + - **Performance**: ~40x faster (decolorizing 100K pixels vs 3.8M pixels) + +### v3.28 (Decolorization Timer - REVERTED) +- **Added decolorization timing**: Timer measures how long decolorizeFullFrame() takes + - Timer added after preprocessing in main processing loop + - Displays: "[TIMER] Decolorization took: Xms" + - **Restored decolorization to pipeline**: Was defined but not being called! + - Decolorization now runs between preprocessing and layout detection + - Enables alarm mode support (colored backgrounds → black/white) + - Allows measurement and optimization of decolorization performance + - Frame lifecycle: raw → preprocess → decolorize → detect/process + +### v3.27 (Save ACTUAL Raw Frames) +- **CRITICAL FIX: Raw frames are now truly raw (unprocessed)** + - **Problem**: `raw_frames/raw_*.png` files were being saved AFTER preprocessing (rotation + crop) + - **Result**: When testing with "raw" frames, they were rotated AGAIN, making displays vertical instead of horizontal + - **Fix**: Clone raw frame immediately after reading from source (line 233) + - **Flow now**: + 1. Read frame from source (portrait orientation) + 2. Clone raw frame → keep for potential error saving + 3. Preprocess: crop timestamp + rotate 90° CW + 4. Decolorize + 5. Process + 6. IF error (corruption, low confidence, unrecognized) → save RAW clone + 7. Close raw frame clone + - Raw frames saved in `processor.go` at error points using the `rawFrame` parameter + - Raw frames are now truly unprocessed - can be used for testing by running through full pipeline + - Fixes issue where testing with raw frames would double-rotate them + +### v3.26 (Fixed Redundant Decolorization) +- **CRITICAL FIX: Eliminated 4x redundant decolorization calls**: Now decolorizes ONCE at top level only + - **Problem**: Frame was being decolorized 4 times for every layout detection: + 1. In `detectScreenWidth()` (2668x1458 - full frame) + 2. In `saveNormalizationDebug()` (2668x1458 - same frame again!) + 3. In `detectScreenLayoutAreas()` (2130x1164 - center region) + 4. In `detectScreenLayoutAreas()` center region (859x518 - smaller region) + - **Each call took 2-3 seconds** - total 8-12 seconds wasted per frame! + - **Fix**: Removed ALL decolorization calls from normalize.go and layout_detection.go + - **Now**: Single decolorization in `pulseox-monitor.go` at line 259-267 + - **Result**: Frame processing time reduced from ~12 seconds to ~3 seconds + - Decolorized frame passed to all layout functions - they expect pre-decolorized input + - Simplified `decolorizeFullFrame()` output - removed progress indicators (10%, 20%, etc.) + - Added comments documenting that functions expect already-decolorized frames + +### v3.25 (Aggressive Debug Output) +- **Added extensive debug image saves**: Every processing step now saves an image to test_output/ + - step1_original.jpg - Frame after preprocessing (rotated) + - step2_decolorized.jpg - After decolorizeFullFrame() (should be black/white) + - step3_grayscale.jpg - After grayscale conversion + - step4_thresholded.jpg - After binary threshold +- **Added decolorization progress tracking**: + - Shows progress every 10% of rows processed + - Shows final statistics: white pixel count/percentage, black pixel count/percentage + - Helps diagnose if decolorization is working correctly +- **Purpose**: Debug why alarm mode decolorization appears to not be working + +### v3.24 (Fixed Layout Decolorization) +- **Fixed missing decolorization in layout detection**: Layout detection was still using colored frames + - Added `decolorizeFullFrame()` call in `detectScreenLayoutAreas()` function + - Previously: Only `detectScreenWidth()` was decolorizing, but `detectScreenLayoutAreas()` was not + - Problem: Layout detection was converting colored frame directly to grayscale + - Result: Alarm mode colored backgrounds prevented contour detection + - Fix: Decolorize BEFORE grayscale conversion in both contour detection passes: + 1. Initial contour detection (full frame) + 2. Center region contour detection + - Now: Layout detection works correctly in both normal and alarm modes + - This was the missing piece preventing alarm mode from working end-to-end + +### v3.23 (Higher White Threshold) +- **Increased white pixel detection threshold**: Changed from 200 to 240 for more selective white detection + - Updated in both `decolorizeFullFrame()` (normalize.go) and `decolorizeAlarmMode()` (ocr.go) + - Previous threshold (200): Too permissive, caught near-white/gray pixels + - New threshold (240): More selective, only truly white pixels (near 255) are preserved + - Logic: If RGB all > 240 → keep as white (255), else → convert to black (0) + - Rationale: Display digits are pure white (255), not gray, so higher threshold is more accurate + - Better separation between white digits/borders and colored alarm backgrounds + +### v3.22 (Decolorize Before Layout) +- **Moved decolorization before layout detection**: Alarm mode now works end-to-end + - New `decolorizeFullFrame()` function in normalize.go + - Applied BEFORE layout detection (in `detectScreenWidth()`) + - Previously: Decolorization only happened during OCR (too late) + - Now: Decolorization happens first, then layout detection sees clean white-on-black displays + - Fixes issue where colored alarm backgrounds prevented layout detection + - Layout detection flow: + 1. Decolorize full frame (yellow/cyan → black, white → white) + 2. Convert to grayscale + 3. Threshold at 170 + 4. Find contours + - Same decolorization also applied in `saveNormalizationDebug()` for accurate debug visualization + - Result: System can now detect and recognize displays in both normal and alarm modes + +### v3.21 (Preprocessing Debug) +- **Added debug output for preprocessing**: Prints frame dimensions before and after preprocessing + - Shows: "Before preprocess: WxH" and "After preprocess: WxH" + - Helps verify that rotation is actually happening + - Expected: Portrait (e.g., 1080x1920) → Landscape (e.g., 1920x1080) + - Temporary diagnostic output to verify preprocessFrame() is working + +### v3.20 (Unified Preprocessing) +- **Verified unified preprocessing path**: Both RTSP streaming and file test modes use identical preprocessing + - All frames go through `preprocessFrame()` function: + 1. Crop timestamp (top 68 pixels) + 2. Rotate 90° clockwise (portrait → landscape) + - Applied in unified `processFrames()` loop (line 250) + - Works for both `RTSPSource` and `FileSource` + - **Important**: Raw frames saved to `raw_frames/` are saved AFTER preprocessing + - Therefore: Raw frames are already rotated and ready for testing + - If testing with unrotated frames, they'll go through preprocessing automatically + - Ensures consistent behavior regardless of frame source + - Layout detection expects landscape orientation (SpO2 and HR side-by-side horizontally) + +### v3.19 (Alarm Mode Support) +- **Added alarm mode decolorization**: System now recognizes digits even when pulse oximeter is in alarm state + - New `decolorizeAlarmMode()` function converts colored alarm backgrounds to black + - Alarm mode displays: + - SpO2: Yellow/orange background with white digits + - HR: Cyan/blue background with white digits + - Decolorization logic: + - Pixels where ALL RGB values > 200 → kept as white (digits) + - All other pixels → converted to black (colored backgrounds, borders) + - Applied before grayscale conversion in OCR pipeline + - Enables normal thresholding and template matching to work correctly + - Previously: Colored backgrounds would confuse thresholding, causing OCR failures + - Now: Alarm mode digits recognized just as reliably as normal mode + - Example: "90" in yellow box → converted to "90" in black/white → OCR succeeds + +### v3.18 (Zero Validation) +- **Added '0' digit validation**: Prevents false '0' recognition by checking for characteristic center hole + - New `validateZero()` function checks for a hole at 50% Y (center) + - Scans horizontally from 30%-70% X looking for black pixels (hole) + - Wider horizontal range than '8' validation (30-70% vs 40-50%) since '0' hole is larger + - If validation fails, marks digit as invalid (-1) triggering retry/re-detection + - Applied to all three digit positions (digit1, digit2, digit3) + - Complements existing '8' validation (two holes at 30% and 70% Y) + - Helps prevent misrecognition of corrupted/partial digits or digits with missing centers as '0' + - Example log output: + ``` + validateZero: center hole @ y=50 (50%) x=15-35 (true) + ``` + +### v3.17 (Enhanced Cleanup) +- **Enhanced directory cleanup on startup**: Now cleans all output directories in streaming mode + - Previously: Only `review/` was cleaned on startup + - Now: Cleans `review/`, `raw_frames/`, and `test_output/` in streaming mode + - Single-frame test mode: No cleanup (preserves test output) + - Progress display: Shows each directory being cleaned with checkmarks + - Example output: + ``` + šŸ—‘ļø Cleaning output directories... + - review/... āœ“ + - raw_frames/... āœ“ + - test_output/... āœ“ + ``` + - Rationale: `raw_frames/` accumulates failed recognition frames over time; clean slate each run + - Rationale: `test_output/` contains debug visualizations that should be fresh per session + +### v3.16 (Append-Only HTML Review) +- **Implemented append-only HTML generation**: Review HTML is now written incrementally instead of all at once on shutdown + - HTML header written once at startup via `initReviewHTML()` + - Each frame entry appended immediately via `appendReviewEntry()` as it's processed + - Footer written on shutdown via `closeReviewHTML()` + - Browser handles missing closing tags gracefully, allowing live refresh + - Benefits: + - āœ… Live updates: refresh browser anytime to see latest frames + - āœ… No regeneration overhead: just one file write per frame + - āœ… Can run for hours without memory/performance issues + - āœ… No data loss if process crashes (all processed frames already written) + - All review entry points now consistently call `appendReviewEntry()`: + - Corruption detection + - Unrecognized digits + - Low confidence (both first attempt and retry) + - Invalid physiological values + - Successful readings + - Unstable readings (held for validation) + - Fixed duplicate `appendReviewEntry()` calls in corruption handler (was calling 3 times) + +### v3.15 (Fixed Eight Validation) +- **Fixed validateEight() probe positions**: Corrected hole detection based on actual measurements + - Issue: Was probing at 33% and 67% which hit the white digit and middle bar + - Fix: Now checks horizontal line segments at 30% and 70% height + - Scans X from 40% to 50% of width (10% range centered around middle) + - Looks for ANY black pixel (< 128) in that range = hole detected + - Much more robust than single pixel check - tolerates slight variations + - Holes are BLACK (empty space), digit is WHITE in thresholded image + - Prevents false rejection of valid "8" digits + +### v3.14 (Corruption Debug Output) +- **Added debug output for corruption cycles**: Helps diagnose repeated corruption detection + - When corruption is detected, saves debug files to `test_output/` with timestamp + - Saves normalized frame: `corruption_YYYYMMDD_HHMMSS_frameN_normalized.png` + - Saves layout visualization: `corruption_YYYYMMDD_HHMMSS_frameN_layout.jpg` + - Also includes all digit crops from review/ directory (already generated by OCR) + - Allows offline analysis when system gets stuck in corruption → re-detect → corruption cycles + - Log message: "šŸ’¾ Debug saved: YYYYMMDD_HHMMSS" + +### v3.13 (RTSP Reconnect Fix + Eight Validation) +- **Fixed segmentation fault on RTSP reconnect failure**: Prevents crash when stream reconnection fails + - Added null check before reading from stream + - Sets stream to nil and marks source as closed after failed reconnect + - Returns shouldContinue=false to stop processing instead of attempting to read from null stream + - Prevents SIGSEGV when VideoCapture is null +- **Added '8' digit validation**: Prevents false '8' recognition by checking for two characteristic holes + - New `validateEight()` function probes two specific regions (at 1/3 and 2/3 height) + - Checks if both regions are at least 60% black (indicating holes) + - If validation fails, marks digit as invalid (-1) triggering retry/re-detection + - Applied to all three digit positions (digit1, digit2, digit3) + - Helps prevent misrecognition of corrupted/partial digits as '8' + +### v3.12 (Physiological Value Validation) +- **Added minimum value threshold for Home Assistant posting**: Prevents posting physiologically impossible values + - Validation rule: Both SpO2 and HR must be >= 40 to post to Home Assistant + - Values below 40 are logged to file and added to review, but NOT posted to HASS + - Console displays: "āš ļø Values below 40 - not posting to HASS" + - Rationale: SpO2 or HR below 40 are not physiologically possible in living patients + - Prevents false alarms and invalid data in Home Assistant database + - Common scenarios triggering this: + - Pulse oximeter disconnected (shows dashes, may be misread as low numbers) + - Recognition errors resulting in single-digit values (0-9) + - Display corruption being partially recognized + +### v3.11 (Reduced Raw Frame Storage) +- **Raw frames now only saved on recognition failures**: Significantly reduces disk space usage + - Previously: Every processed frame saved to `raw_frames/` (all successes and failures) + - Now: Only saves frames when recognition fails (corruption, unrecognized, low confidence) + - Successful readings and unstable readings (held for validation) no longer saved + - Failure cases where frames are saved: + - āŒ Corrupted frames (invalid pattern matched) + - āŒ Unrecognized digits (negative values) + - āŒ Low confidence readings (both first attempt and retry) + - Typical result: ~90% reduction in raw frame storage for stable monitoring + - Failed frames still available for debugging and training data collection + +### v3.10 (Corruption Frame Number) +- **Added frame number to corruption log message**: Easier debugging of false corruption detections + - Message now shows: `[CORRUPTION] Frame #123 - Digit = -1 detected: SpO2(9,5) HR(7,-1) - skipping frame` + - Helps identify which specific frame triggered invalid pattern match + - Useful for reviewing frames in HTML output and approving/removing invalid templates + +### v3.9 (Simplified Digit Detection Arrays) +- **Simplified digit detection to use only arrays**: Removed all intermediate variables + - `D[1,2,3]` array stores cut positions for digit extraction + - `digitIsOne[1,2,3]` array stores detection results (is it a "1"?) + - Eliminated intermediate variables like digit1IsOne, digit2IsOne, digit3IsOne, middleStart, rightStart + - Code now directly uses array notation throughout: `digitIsOne[2]`, `D[3]`, etc. + - Cleaner, more maintainable code with single source of truth +- **Fixed "13" recognition bug**: D1 position now correctly calculated from accumulated widths + - Loop accumulates widths: digit3 → digit2 → digit1 + - D1 check position = w - digit3Width - digit2Width - 5 + - Previously was checking at wrong position when only 2 digits present + +### v3.8 (Code Cleanup & Digit Detection Loop) +- **Added helpers.go**: New file with `logf()` helper function + - Eliminates repetitive `if logger != nil` checks throughout codebase + - Makes logging calls cleaner and more maintainable + - Applied across processor.go, validators.go, and ocr.go +- **Refactored digit detection to use loop**: Replaced repetitive digit checking code with elegant loop + - Uses arrays for digit names, detection results, and widths + - Accumulates width progressively (digit3 → digit2 → digit1) + - More maintainable and easier to understand + - Eliminates code duplication + +### v3.7 (3-Digit Value Calculation Fix) +- **Fixed 3-digit value calculation bug**: Removed score requirement for digit1 detection + - Issue: When `digit1IsOne` detected a "1" digit, code also required template match score > 50% + - Problem: digit1 region includes empty padding, causing low template match scores + - Result: 106 was calculated as 6, 103 as 3, 104 as 4 + - Fix: Trust `hasOneAt()` detection alone - if digit1IsOne is true, use 100 + num2*10 + num3 + - The `hasOneAt()` function already confirms there's a "1" pattern at the correct position + +### v3.6 (Millisecond Timestamps) +- **Added millisecond precision to timestamps**: Changed timestamp format from `15:04:05` to `15:04:05.000` + - Provides more precise timing for frame processing and reading logs + - Helpful for debugging timing issues and frame processing delays + - Format: `[03:03:17.245] SpO2=93%, HR=85 bpm` + +### v3.5 (Frame Rate + Timestamp Validation) +- **Frame rate adjustment**: Changed from 30fps to 15fps camera + - Adjusted processing from every 6th frame to every 4th frame + - Processing rate: ~3.75fps (was ~5fps) + - Camera was struggling with 30fps, reduced to 15fps +- **Timestamp validation**: Added OCR check of camera timestamp + - Validates timestamp every 10 processed frames (~2.5 seconds) + - Warns if camera time differs by >3 seconds from server + - Uses optimized gosseract library (~22ms per check) + - **Optimizations**: Downscale 50%, grayscale, Otsu threshold, invert, PNG encode + - **Reused OCR client** for major speed boost (was 120ms, now 22ms warm) + - Silent when drift is within ±3 seconds + - Helps detect frozen/lagging streams +- **New file**: `timestamp_ocr.go` - Timestamp extraction and validation + +### v3.4 (Low Confidence Counter) +- **Added low confidence counter**: Tracks and displays frequency of low confidence frames + - New `LowConfidenceCount` field in ProcessingState + - Console displays count when it increments: "Low confidence (#X)" + - Helps identify camera/lighting issues needing attention + - Counter appears for both first detection and retry failures + +### v3.3 (Corruption Detection Fix) +- **Fixed corruption detection**: Direct check prevents invalid frames from reaching stability + - Changed from `validateCorruption()` wrapper to direct `IsCorrupted()` call + - ANY digit = -1 (invalid template match) now immediately returns StatusCorrupted + - Prevents invalid/corrupted frames from being marked as "unstable" + - Ensures clean separation: corruption → silent skip, low confidence → retry + +### v3.2 (Review & Signal Handling) +- **Fixed Ctrl+C handling**: Simplified signal handling to prevent segfaults + - Removed stream.Close() from signal handler (was causing SIGSEGV) + - Stream cleanup now happens naturally on program exit + - Sets closed flag only, checks happen between frame reads +- **Enhanced review.html**: Failed frames now show all digit images + - Corruption, low confidence, and unrecognized frames display digit crops + - Allows reviewing and approving digits from failed recognitions + - Red background distinguishes failed frames +- **Silenced corruption detection**: Invalid patterns work quietly + - Corrupted frames no longer appear in console or HTML + - Only logged to file for debugging purposes + - Keeps review focused on actionable items + +### v3.1 (Optimizations) +- **Removed FPS display**: Cleaned up console output + - No more "Stream FPS: X (avg over Y frames)" messages + - Simpler, focused console showing only readings and warnings +- **Negative value handling**: Unrecognized digits now trigger immediate retry + - SpO2 or HR < 0 (OCR returned -1) treated as low confidence + - Triggers immediate next frame read + layout re-detection + - Prevents false "unstable" warnings for blurry/unrecognized frames +- **Frame rate adjustment**: Changed from every 4th to every 6th frame + - ~5 fps processing rate from 30fps stream + - Better balance for 30fps camera (was tuned for 15fps) + +### v3.0 (Refactored) +- **Modular architecture**: Split monolithic pulse-monitor.go into focused modules + - processor.go: Frame processing orchestration + - validators.go: Validation logic + - frame_source.go: Abstracted sources (RTSP/file) + - types.go: Shared data structures + - normalize.go: Frame normalization +- **Smart escalation strategy**: Progressive failure handling + - 1st failure: Try next frame (0s wait) + - 2nd failure: Re-detect layout (0s wait) + - 3rd failure: Wait 10s + - 4th failure: Wait 30s + - 5th+ failures: Wait 60s + - Success resets counter +- **Interruptible sleep**: Ctrl+C works immediately during waits + - Checks `source.IsActive()` every second + - No more forced 60s waits before shutdown +- **ConsecutiveFailures counter**: Tracks problems across processing + - Incremented on layout failures, corruption, low confidence + - Reset on successful processing + - Enables intelligent escalation +- See V3_CHANGES.md for complete migration guide + +### v2.35 +- **Robust layout detection under varying light conditions** + - Changed Step 3 logic: instead of picking "2 largest boxes", find rightmost X in each half + - Splits detection area at X-center, finds max(rightX) for left half (SpO2) and right half (HR) + - Handles digit fragmentation: even if "95" breaks into "9" and "5" contours, still finds correct right edge + - Tracks Y range across all contours in each half for proper bounding +- **Raw frame archiving** + - Saves every processed frame to `raw_frames/raw_YYYYMMDD-NNNNN.png` (persistent directory) + - Enables offline testing and replay of detection algorithms + - Files are the rotated frames (after timestamp crop), exactly as fed to detection pipeline + - Not cleaned on restart - accumulates for historical analysis + +### v2.34 +- Every 4th frame processing (improved responsiveness) +- Simplified corruption handling (no immediate retry) +- Silent corruption detection (log only) +- Digit 3 detection offset: -8 pixels +- Invalid template: 2 patterns + +### Previous versions +- v2.33: Added hindsight validation for large deltas +- v2.32: Implemented low confidence retry with layout re-detection +- v2.31: Added invalid digit detection +- Earlier: Various contour detection and template matching improvements + +--- + +## Contact & Context + +**Development Environment:** macOS (local development and testing) +**Target Deployment:** Ubuntu 22.04 with Docker +**Home Assistant Version:** (check your setup) +**Camera:** (specify your RTSP camera model) + +**Project Location:** `/Users/johanjongsma/pulse-monitor/` + +--- + +## Quick Reference + +### Start monitoring +```bash +cd /Users/johanjongsma/pulse-monitor +./pulseox-monitor +``` + +### Stop and generate review +``` +Ctrl+C +# Opens review/review.html automatically +``` + +### Check logs +```bash +tail -f pulseox-monitor_*.log +``` + +### Approve training digit +```bash +# Move good digit image to training_digits/ +# Naming: {digit}_{variant}.png +``` + +### Mark as invalid/corruption +```bash +# Move bad pattern to training_digits/invalid/ +# Naming: descriptive name like "white_blob_2.png" +``` + +--- + +**Document Purpose:** This document serves as a comprehensive reference for understanding the project state, architecture, and reasoning. It should enable a new chat session (or developer) to quickly understand what's been built, why decisions were made, and how to continue development. + +**Last Verified Working:** November 30, 2025 (v3.68 - Fixed double transform bug, OCR now working) diff --git a/backup.sh b/backup.sh new file mode 100755 index 0000000..0a015e7 --- /dev/null +++ b/backup.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Backup script for pulse-monitor project +# Creates a backup directory with today's date and copies all relevant files + +DATE=$(date +%Y%m%d) +BACKUP_DIR="backup_${DATE}" + +# Check if backup already exists +if [ -d "$BACKUP_DIR" ]; then + echo "Backup directory $BACKUP_DIR already exists." + read -p "Overwrite? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Backup cancelled." + exit 1 + fi + rm -rf "$BACKUP_DIR" +fi + +# Create backup directory +mkdir -p "$BACKUP_DIR" + +# Copy relevant files +echo "Creating backup in $BACKUP_DIR..." + +# Go source files +cp -v *.go "$BACKUP_DIR/" 2>/dev/null + +# Markdown documentation +cp -v *.md "$BACKUP_DIR/" 2>/dev/null + +# Configuration files +cp -v *.yaml "$BACKUP_DIR/" 2>/dev/null +cp -v *.yml "$BACKUP_DIR/" 2>/dev/null + +# Shell scripts (except this one to avoid recursion issues) +for f in *.sh; do + if [ "$f" != "backup.sh" ]; then + cp -v "$f" "$BACKUP_DIR/" 2>/dev/null + fi +done + +# Build script +cp -v build.sh "$BACKUP_DIR/" 2>/dev/null + +# Makefile if exists +cp -v Makefile "$BACKUP_DIR/" 2>/dev/null + +# Go module files +cp -v go.mod "$BACKUP_DIR/" 2>/dev/null +cp -v go.sum "$BACKUP_DIR/" 2>/dev/null + +# Training digits directory +if [ -d "training_digits" ]; then + cp -rv training_digits "$BACKUP_DIR/" 2>/dev/null +fi + +echo "" +echo "āœ“ Backup complete: $BACKUP_DIR" +echo "Files backed up:" +ls -la "$BACKUP_DIR/" diff --git a/backups/backup_20251030_043342/BACKUP_CREATED.txt b/backups/backup_20251030_043342/BACKUP_CREATED.txt new file mode 100644 index 0000000..aec9818 --- /dev/null +++ b/backups/backup_20251030_043342/BACKUP_CREATED.txt @@ -0,0 +1,22 @@ +BACKUP CREATED: 2025-10-30 04:33:42 + +This backup contains all source files from pulse-monitor v2.35 before refactoring. + +Files that should be backed up: +- pulse-monitor.go (main application - 700+ lines) +- ocr.go (template matching) +- layout_detection.go (display detection) +- normalize.go (screen width normalization) +- config.go (configuration loading) +- homeassistant.go (HA posting) +- html_report.go (review HTML generation) +- PROJECT_STATE.md (comprehensive documentation) +- config.yaml (configuration file with credentials) + +To restore from this backup: +cp backups/backup_20251030_043342/*.go . +cp backups/backup_20251030_043342/PROJECT_STATE.md . +cp backups/backup_20251030_043342/config.yaml . + +The original source files are still intact at /Users/johanjongsma/pulse-monitor/ +This backup was created as a safety measure before major refactoring. diff --git a/backups/backup_20251030_043342/PROJECT_STATE.md b/backups/backup_20251030_043342/PROJECT_STATE.md new file mode 100644 index 0000000..1501f29 --- /dev/null +++ b/backups/backup_20251030_043342/PROJECT_STATE.md @@ -0,0 +1,1525 @@ +# PulseOx Monitor - Project State & Technical Documentation + +**Current Version:** v3.56 (Threshold AFTER Preprocessing) +**Last Updated:** November 15, 2025 +**Status:** Production - threshold after preprocessing, timestamp stays colored + +āš ļø **IMPORTANT:** Update VERSION constant in pulseox-monitor.go and this document with every build! + +**For AI Assistant:** You MUST increment the version number in both pulseox-monitor.go and this document whenever you make ANY code changes. This is not optional - it's a critical part of the workflow. + +--- + +## šŸ”§ For New Chat Sessions: Data Access + +**You have full filesystem access to `/Users/johanjongsma/pulse-monitor/`** + +Use these tools to work with the project: +- `Filesystem:read_file` - Read source code, logs, config files +- `Filesystem:write_file` - Create new files +- `Filesystem:edit_file` - Modify existing code (preferred for edits) +- `Filesystem:list_directory` - See what files exist +- `Filesystem:move_file` - Move/rename files (e.g., approving training digits) +- `Filesystem:search_files` - Find files by pattern + +**Key files you'll need:** +- Source: `pulseox-monitor.go`, `ocr.go`, `layout_detection.go`, etc. +- Config: `config.yaml` +- Training: `training_digits/*.png`, `training_digits/invalid/*.png` +- Output: `review/*.png`, `review/review.html` +- Logs: `pulseox-monitor_*.log` + +**DO NOT waste time asking how to access files - just use the filesystem tools directly!** + +--- + +## Project Overview + +This system monitors a Masimo Rad-G pulse oximeter by reading its display via RTSP camera feed. It uses computer vision (OpenCV/gocv) to perform OCR on the SpO2 and heart rate values, then posts readings to Home Assistant for tracking and alerting. + +### Why This Exists +The Masimo pulse oximeter doesn't have a data output port, so we use a camera pointed at its display. The display has significant overexposure and light leakage issues, making traditional OCR (Tesseract) unreliable. We developed a custom template-matching approach that handles these challenging conditions. + +--- + +## Architecture + +### Main Components + +1. **pulseox-monitor.go** - Main application (v3.10) + - Unified frame processing loop + - Interruptible sleep for graceful shutdown + - Smart escalation strategy + - Signal handling (Ctrl+C) + +2. **processor.go** - Frame processor + - OCR execution + - Result validation + - Home Assistant posting + +3. **validators.go** - Validation handlers + - Corruption detection + - Confidence checking + - Stability validation (hindsight) + +4. **ocr.go** - Recognition engine + - Template matching for digits 0-9 + - Invalid pattern detection (corruption) + - `hasOneAt()` function for detecting narrow '1' digits + - Dynamic width calculation for 2-digit vs 3-digit displays + +5. **layout_detection.go** - Display locator + - Finds SpO2 and HR display areas in frame + - Position-based detection (rightmost edge method) + - Handles digit fragmentation + +6. **normalize.go** - Frame normalization + - Screen width detection + - Scaling to 860px width + - Layout detection with normalization + +7. **frame_source.go** - Frame sources + - RTSP streaming source + - Single-file test source + - Interruptible via `IsActive()` method + +8. **types.go** - Shared data structures + - ProcessingState with ConsecutiveFailures counter + - Reading, ProcessingResult types + - Status and Action enums + +9. **homeassistant.go** - Data posting + - REST API integration + - Posts only when values change + - Separate sensors: `sensor.pulse_ox_spo2`, `sensor.pulse_ox_hr` + +10. **html_report.go** - Review interface + - Dark-themed HTML generated on shutdown + - Shows all processed frames with confidence scores + - Visual indicators for unstable readings + +11. **timestamp_ocr.go** - Timestamp validation + - Extracts camera timestamp from frame + - Uses Tesseract OCR + - Validates against server time + - Warns if lag >10 seconds + +### Data Flow + +``` +RTSP Stream / File → Frame Capture (every 4th frame) → +Timestamp validation (every 10th frame) on **COLORED** frame → +Preprocessing (crop 68px top, rotate 90° CW) on **COLORED** frame → +**THRESHOLD ONCE at 240** (grayscale → binary) AFTER preprocessing → +Layout detection (find contours on binary) → +Display area extraction (280x200px binary regions) → +Template matching OCR on binary → +Digit validation (8 & 0) → +Exception handling → +Home Assistant posting +``` + +--- + +## Recognition System + +### Template Matching Approach + +We use a library of digit images extracted from the actual pulse oximeter display. Each digit (0-9) can have multiple template variants to handle slight variations. + +**Current templates:** +- Digits 0-9: Multiple variants per digit +- Invalid patterns: 2 templates in `training_digits/invalid/` + - `white_blob_1.png` + - `invalid_2.png` + +**Template location:** `/Users/johanjongsma/pulse-monitor/training_digits/` + +### Digit Width Detection + +The system handles both 2-digit (XX) and 3-digit (1XX) displays dynamically: + +- **Narrow '1' detection:** `hasOneAt(x)` checks if column X is 80%+ white and column X+10 is 80%+ black +- **Check positions:** + - Digit 3 (rightmost): `w-8` pixels from right edge + - Digit 2: Depends on digit 3 width (72px for '1', 100px otherwise) + - Digit 1: Depends on both digit 2 and 3 widths + +**Why -8 offset for digit 3?** +Originally used -5, but testing showed '1' digits needed detection slightly more to the left to catch the right edge reliably. + +### Display Positioning + +- **SpO2 and HR displays:** 234 pixels apart vertically +- **Layout detection:** Run once at startup, locked unless re-detection triggered +- **Frame processing:** Every 4th frame (~3.75 fps from 15fps stream) +- **Timestamp check:** Every 10th processed frame (~2.5 seconds) + +--- + +## Exception Handling System + +The system has three independent exception handlers that run in sequence: + +### 1. Confirmed Corruption (Priority: Highest) +**Trigger:** Any digit recognized as `-1` (matched invalid template with >70% confidence) + +**Behavior:** +- Log to file (silent to console) +- Skip corrupted frame +- Continue to next frame through normal processing +- No special retry, no layout re-detection + +**Why this approach:** +Corruption is a recognition failure, not a camera/lighting issue. The next frame will naturally be clean. + +**Invalid templates:** +Stored in `training_digits/invalid/` - patterns like white blobs, UI elements, or corrupted digits that should never be interpreted as valid numbers. + +### 2. Low OCR Confidence +**Trigger:** Average confidence < 85% for SpO2 or HR, OR negative values (unrecognized digits) + +**Behavior:** +1. **First occurrence:** + - Log "āš ļø Low confidence, retrying with next frame..." or "[UNRECOGNIZED] SpO2=X, HR=Y" + - Read next frame immediately + - Re-detect layout (handles camera movement/vibration) + - Process retry frame + - If successful (high confidence + values changed): perform stability check and post + +2. **Second consecutive low confidence:** + - Log "āš ļø Low confidence after retry, pausing 2 seconds" + - Give up, wait 2 seconds before continuing + +**Why re-detect layout:** +Low confidence or unrecognized digits often indicate camera movement, lighting changes, focus issues, or temporary blur. Re-detecting layout helps recover from these transient problems. + +### 3. Large Delta (Hindsight Validation) +**Trigger:** Values changed >3 points from last stable reading (e.g., SpO2: 95→92 or HR: 70→75) + +**Behavior:** +1. Hold new reading as "pending" (don't post yet) +2. Store baseline (value before the spike) +3. Wait for next reading +4. **If next reading confirms trend** (moves in same direction): + - Post the held reading + - Post the confirming reading +5. **If next reading contradicts** (moves opposite direction or returns to baseline): + - Discard held reading as glitch + - Post the new reading + +**Why hindsight validation:** +Physiological changes are gradual. Sudden spikes >3 points are usually recognition glitches or transient display artifacts. This prevents false alarms while allowing real trends. + +**Example scenario:** +``` +Baseline: SpO2=95 +Reading 1: SpO2=91 (Ī”4) → HOLD, don't post +Reading 2: SpO2=89 (Ī”2 from held) → Trend confirmed, post 91 then 89 +``` + +vs. + +``` +Baseline: SpO2=95 +Reading 1: SpO2=91 (Ī”4) → HOLD, don't post +Reading 2: SpO2=95 (back to baseline) → Discard 91 as glitch, post 95 +``` + +--- + +## Key Design Decisions + +### 1. Change-Based Posting Only +**Decision:** Only post when SpO2 or HR values actually change. + +**Rationale:** +- Reduces Home Assistant database bloat +- Avoids unnecessary network traffic +- Still captures all meaningful changes + +### 2. Template Matching Over Tesseract OCR +**Decision:** Use custom template matching instead of Tesseract. + +**Rationale:** +- Pulse oximeter display has severe overexposure +- Tesseract unreliable with bright backgrounds +- Template matching: 90%+ accuracy vs. Tesseract: ~60% +- We have stable digit shapes - perfect for template matching + +### 3. Fixed Layout After Startup +**Decision:** Detect layout once, lock it, only re-detect on low confidence retry. + +**Rationale:** +- Camera is stationary (mounted on tripod) +- Display position doesn't change +- Layout detection is expensive (~10-20ms) +- Re-detection only needed if camera moves/vibrates + +### 4. No Immediate Corruption Retry +**Decision:** Don't immediately retry after detecting corruption; just skip to next frame. + +**Rationale:** +- Corruption is a recognition issue, not camera issue +- Next frame (133ms later at 15fps) will naturally be different +- Avoids complexity of retry-within-retry scenarios +- Simpler code, same reliability + +### 5. Processing Every 4th Frame +**Decision:** Process every 4th frame (~3.75 fps from 15fps stream). + +**Rationale:** +- Balance between responsiveness and CPU usage +- SpO2/HR don't change faster than ~1 reading/second physiologically +- Allows time for processing, posting, and stability checks +- Adjusted for 15fps camera (was 6th frame for 30fps) +- Can be adjusted: 3rd frame = ~5fps, 5th frame = ~3fps + +### 6. Dark Theme HTML Review +**Decision:** Dark background with light text in review HTML. + +**Rationale:** +- Easier on eyes during long review sessions +- Better contrast for confidence scores +- Color-coded indicators more visible + +### 7. No FPS Display +**Decision:** Don't display stream FPS statistics. + +**Rationale:** +- Stream FPS is constant (30fps from camera) +- FPS logging adds noise to console output +- Processing rate (every 6th frame) is more relevant than raw stream FPS +- Keeps console focused on actual readings and warnings + +### 8. Digit Validation for '8' and '0' +**Decision:** Validate recognized '8' and '0' digits by checking for characteristic holes. + +**Rationale:** +- Template matching can falsely recognize corrupted/partial digits as '8' or '0' +- '8' validation: checks for TWO holes (at 30% and 70% Y, scanning 40-50% X) +- '0' validation: checks for ONE hole (at 50% Y, scanning 30-70% X) +- Wider scan range for '0' (30-70%) than '8' (40-50%) because '0' hole is larger +- If validation fails, digit marked as -1 (invalid) → triggers retry/re-detection +- Prevents false alarms from misrecognized digits +- Simple pixel-based validation is fast and reliable + +### 9. Alarm Mode Decolorization +**Decision:** Pre-process colored alarm backgrounds before OCR to normalize them to black/white. + +**Rationale:** +- Pulse oximeter displays colored backgrounds when in alarm mode (low SpO2 or abnormal HR) +- SpO2 alarm: Yellow/orange background, HR alarm: Cyan/blue background +- Normal thresholding fails with colored backgrounds +- Decolorization converts: white digits (RGB all > 240) → white (255), everything else → black (0) +- **Applied ONLY to display regions AFTER extraction** (in `recognizeDisplayArea()` function) +- **Layout detection works on original colored frames** - grayscale conversion handles colors fine for contour detection +- **Decolorization timing**: + 1. Layout detection: Original colored frame → grayscale → threshold → find contours (works fine) + 2. Extract display regions: Still colored (280x200px = 56K pixels) + 3. Decolorize extracted regions: Only ~100K pixels total (both displays) + 4. Continue with OCR: grayscale → threshold → template matching +- Each display region is ~280x200px = 56K pixels +- Decolorizing two display regions (~100K pixels) takes <10ms +- Threshold of 240 ensures only pure white pixels (digits/borders) are preserved, not grays +- No performance impact - simple pixel-wise operation on small regions +- Enables reliable digit recognition regardless of alarm state +- Critical for 24/7 monitoring where alarms are expected + +--- + +## Environmental Considerations + +### Light Leakage Issues +**Problem:** Ambient light creates false contours and affects brightness detection. + +**Solutions implemented:** +- Increased brightness threshold to 170 (from lower values) +- More selective contour filtering +- Height filters to exclude oversized boxes (<200px) + +**Manual mitigation:** +Position towel/cloth to block light from reflecting off display surface. + +### Camera Positioning +**Critical:** Camera must be positioned to minimize glare and ensure consistent framing. + +**Current setup:** +- Tripod-mounted camera +- RTSP stream from network camera +- Fixed zoom/focus (no autofocus) + +--- + +## Files and Directories + +### Source Files +- `pulseox-monitor.go` - Main application +- `processor.go` - Frame processor +- `validators.go` - Validation handlers +- `ocr.go` - Recognition engine +- `layout_detection.go` - Display finder +- `normalize.go` - Frame normalization +- `frame_source.go` - Frame sources (RTSP/file) +- `types.go` - Shared data structures +- `helpers.go` - Utility functions (logf) +- `homeassistant.go` - API integration +- `html_report.go` - Review page generator +- `timestamp_ocr.go` - Timestamp validation +- `config.go` - Configuration loading + +### Configuration +- `config.yaml` - Camera URL and Home Assistant settings + +### Training Data +- `training_digits/*.png` - Valid digit templates (0-9) +- `training_digits/invalid/*.png` - Corruption patterns + +### Output +- `review/` - Frame captures and digit images (cleaned each run) + - `f{N}_boxes.jpg` - Visualization of detected areas + - `f{N}_{display}_checks.png` - Digit detection analysis + - `f{N}_{display}_digit[1-3].png` - Individual digit crops + - `f{N}_{display}_full.png` - Full display with recognized value + - `review.html` - Live-updated HTML (append-only) +- `raw_frames/` - Raw processed frames for failed recognitions (cleaned each run) + - `raw_YYYYMMDD-NNNNN.png` - **TRUE raw frames** (unprocessed, portrait orientation, with timestamp) + - Only saved when recognition fails (corruption, low confidence, unrecognized) + - Used for debugging and training data collection + - **Can be tested directly**: `./pulseox-monitor raw_frames/raw_*.png` (will go through full preprocessing pipeline) +- `test_output/` - Debug visualizations (cleaned each run) + - Layout detection debug files + - Corruption debug snapshots with timestamps + +### Logs +- `pulse-monitor_YYYYMMDD_HHMMSS.log` - Detailed execution log + - Console shows only: value changes, warnings, errors + - File logs: all frames, timing, confidence scores, decisions + +--- + +## Development Workflow + +### Version Management + +**CRITICAL: Update version with every build!** + +**For AI Assistant (Claude):** +- ALWAYS increment version number when making ANY code changes +- Update BOTH pulse-monitor-new.go and PROJECT_STATE.md +- Add entry to Version History section documenting what changed +- This is MANDATORY, not optional + +**Version increment process:** + - Open `pulseox-monitor.go` + - Update `const VERSION = "v3.XX"` (increment version number) + - Document what changed in version history section below + +2. **After significant changes:** + - Update this PROJECT_STATE.md document + - Keep architecture, design decisions, and troubleshooting current + - Update "Last Verified Working" date at bottom + +3. **Commit discipline:** + - Version bump = code change + - Every feature/fix gets a version increment + - Keep PROJECT_STATE.md in sync with code + +### Adding New Training Digits + +When you find a good digit image in the review files: + +```bash +# Example: Approve f768_hr_digit3.png as digit "1" +# Creates training_digits/1_2.png (next available variant) +``` + +Current naming: `{digit}_{variant}.png` (e.g., `7_1.png`, `7_2.png`) + +### Adding Invalid Patterns + +When you find corrupted/glitchy images: + +```bash +# Move to invalid folder with descriptive name +# Creates training_digits/invalid/invalid_X.png +``` + +### Testing Changes + +1. Edit code on Mac +2. Compile: `./build.sh` +3. Run: `./pulseox-monitor` +4. Monitor console for value changes and warnings +5. Check log file for detailed diagnostics +6. Review HTML on shutdown (Ctrl+C) for visual verification + +### Performance Tuning + +**Frame rate:** +- Change `skipCount` parameter in NewRTSPSource call (currently 4) +- Located in pulse-monitor-new.go +- Values: 3=~5fps, 4=~3.75fps, 5=~3fps (for 15fps stream) + +**Confidence threshold:** +- Currently 85% for high confidence +- Located in multiple places in main loop +- Lower = more false positives, Higher = more retries + +**Delta threshold:** +- Currently 3 points for stability check +- Change in large delta handler +- Lower = stricter (more held readings), Higher = more lenient + +--- + +## Known Limitations + +1. **Camera-dependent:** Requires good camera positioning and lighting +2. **Display-specific:** Templates are specific to Masimo Rad-G display fonts +3. **No real-time HTML:** Review HTML only generated on shutdown +4. **Three-digit limitation:** Assumes HR displays as 1XX when >99, may need adjustment for other scenarios +5. **Brightness sensitivity:** Very bright ambient light can still cause issues + +--- + +## Future Enhancements (Planned) + +### Docker Deployment +**Target:** Ubuntu 22.04 server with Docker Compose + +**Requirements:** +- Dockerfile with Go + OpenCV +- Mount volumes: config.yaml, training_digits/, review/, logs/ +- Network access to RTSP stream and Home Assistant +- docker-compose.yml integration with existing HA setup + +**Considerations:** +- Image size: ~500MB-1GB with OpenCV +- Multi-stage build possible for smaller runtime image +- Review file access via scp or lightweight HTTP server + +### SQLite + Live HTML +**Goal:** Real-time progress viewing without stopping app + +**Approach:** +- Store frame data in SQLite as processed +- HTTP endpoint generates HTML on-demand from DB +- Access at `http://server:8080/review.html` anytime + +**Benefits:** +- No shutdown required to review progress +- Historical data queryable +- API potential for other tools + +### Adaptive Thresholds +**Idea:** Auto-adjust confidence and delta thresholds based on recent history + +**Example:** +- If 90% of last 100 frames >95% confidence → raise threshold to 90% +- If frequent false positives → increase delta threshold temporarily + +--- + +## Troubleshooting Guide + +### "No templates found for digit X" +**Problem:** Missing training data for a digit. + +**Solution:** Run system, review output, approve good images for that digit. + +### Frequent low confidence warnings +**Causes:** +- Camera moved/vibrated +- Lighting changed +- Focus drifted +- Display brightness changed + +**Solutions:** +- Check camera mounting +- Adjust lighting (block reflections) +- Re-focus camera if needed +- Restart to re-detect layout + +### False corruption detection +**Problem:** Valid digits matched as invalid. + +**Solution:** Review invalid templates, remove overly broad patterns. + +### Large delta causing unnecessary holds +**Problem:** Real physiological changes >3 points being held. + +**Solution:** +- Increase delta threshold (e.g., to 5) +- Or adjust hindsight validation logic +- Consider different thresholds for SpO2 vs HR + +### Values not posting to Home Assistant +**Checks:** +1. Home Assistant URL correct in config.yaml? +2. Network connectivity? +3. Sensors created in HA configuration? +4. Check log file for POST errors +5. Are values actually changing? (change-based posting only) + +--- + +## Performance Characteristics + +**Typical timing per frame (v3.48):** +- Frame acquisition: 150-250ms (camera waiting time) +- Preprocessing: 8-12ms +- Frame scaling: 3-5ms +- SpO2 recognition: 9-12ms +- HR recognition: 9-12ms +- Validation: 0-1ms +- File I/O: 0ms (normal), 40-50ms (failures only) +- Home Assistant POST: 5ms (when values change) +- **Total: 25-35ms per frame** (processing only, excluding camera wait) +- Timestamp check (every 10th frame): ~22ms (optimized with reused OCR client) + +**Debug mode (with /debug flag):** +- Additional file I/O: +40ms (saves all digit images) +- Total: 65-75ms per frame + +**With every 4th frame processing at 15fps:** +- ~3.75 readings per second maximum +- Actual posting rate: varies (only when values change) +- CPU usage: Low (single-threaded, mostly idle) +- Timestamp validation: Every ~2.5 seconds (~22ms each) + +--- + +## Version History + +### v3.56 (Current - Threshold AFTER Preprocessing) +- **CRITICAL FIX: Moved threshold to AFTER preprocessing**: Timestamp overlay was being thresholded! + - **Problem in v3.54-v3.55**: Threshold happened BEFORE preprocessing + - Colored frame → threshold → binary + - Timestamp extracted from binary frame (appeared as B&W) + - Preprocessed binary frame (cropped B&W timestamp) + - **Correct flow now**: + 1. Acquire colored frame + 2. Extract timestamp from **colored** frame (stays colored!) + 3. Preprocess colored frame (crop timestamp + rotate) + 4. **Threshold preprocessed frame** at 240 → binary + 5. Layout detection on binary + 6. OCR on binary + - **Why this is correct**: + - Timestamp area is removed BEFORE thresholding (stays colored) + - Only the actual display area (after crop/rotate) gets thresholded + - Single threshold at 240, happens once, at the right time + - **Impact**: Saved raw frames now show colored timestamp overlay (as expected) + +### v3.55 (Fixed Double Threshold Bug) +- **CRITICAL FIX: Removed duplicate threshold in saveThresholdedFrame()**: Frame was being thresholded TWICE + - **Problem**: saveThresholdedFrame() was doing grayscale conversion + threshold at 128 + - **But**: rawFrame parameter is ALREADY thresholded binary (from line 313 in pulseox-monitor.go) + - **Result**: Frames were being thresholded twice - once at 240, then again at 128 + - **Fix**: saveThresholdedFrame() now just saves the frame directly (no conversion, no threshold) + - **Impact**: Overlay/timestamp area should look consistent now + - This was the "extra" threshold causing B&W overlay to look different +- **Confirmed ONLY one threshold operation now**: + - pulseox-monitor.go line 292: Threshold at 240 (the ONLY threshold) + - layout_detection.go: No threshold (works on binary) + - ocr.go: No threshold in recognizeDisplayArea() (works on binary) + - processor.go: saveThresholdedFrame() just saves (no threshold) +- Note: ocr.go still has OLD deprecated recognizeDisplay() function with thresholds, but it's never called + +### v3.54 (Single Threshold Architecture - Had Double Threshold Bug) +- Attempted to threshold once at 240 after frame acquisition +- Removed internal thresholds from layout_detection.go and ocr.go +- BUT missed duplicate threshold in saveThresholdedFrame() → fixed in v3.55 + +### v3.53 (CPU Optimization - Thread Limiting) +- Removed duplicate ProcessedCount increment bug +- Limited OpenCV threads to 1 for minimal overhead + +### v3.52 (CPU Optimization Complete) +- **Cleaned up failed GPU acceleration attempts**: Removed AVFoundation backend code + - **Investigation findings**: GPU hardware acceleration for RTSP streams is not feasible with current OpenCV setup + - AVFoundation doesn't support network streams (RTSP), only local camera devices + - OpenCV's ffmpeg backend has VideoToolbox compiled in but doesn't auto-activate for RTSP + - Would require either: (1) patching OpenCV's videoio plugin in C++, (2) complete rewrite with different library, or (3) local transcoding proxy + - **Final optimization achieved**: CPU reduced from 50% to ~30% (40% reduction) + - Single-threaded OpenCV (gocv.SetNumThreads(1)) eliminated thread overhead + - Homebrew's OpenCV has AVFoundation/VideoToolbox support but only for local devices + - ~15% CPU for RTSP H.264 decoding (software) + ~15% CPU for OCR processing + - **Conclusion**: 30% CPU is acceptable for 24/7 monitoring, fan noise significantly reduced + +### v3.51 (Attempted GPU Acceleration - Reverted) +- **Attempted AVFoundation backend**: Failed for RTSP streams (network streams not supported) + +### v3.50 (Reduced CPU Overhead) +- **Added OpenCV thread limiting**: Dramatically reduced CPU usage and thread count + - **Problem**: OpenCV was creating 40 threads (one per CPU core) for small operations + - **Impact**: Excessive context switching, 65%+ CPU usage for simple 280x200px image operations + - **Fix**: Limited OpenCV to 1 thread with `gocv.SetNumThreads(1)` at startup + - **Rationale**: + - Our images are tiny (280x200px per display) + - We process sequentially, not in parallel + - Processing time <20ms - faster than thread spawning overhead + - Single-threaded = zero thread management overhead + - **Expected result**: + - Thread count drops from ~40 to ~5-8 (only Go runtime threads) + - CPU usage drops from 65% to <20% + - No performance loss (processing still <20ms per frame) + - **Startup message**: "šŸ”§ OpenCV threads limited to 1 (single-threaded for minimal overhead)" + - **Testing**: v3.50 with 2 threads showed 23 threads/48% CPU, so moved to 1 thread for further optimization + +### v3.49 (Fixed Frame Counter Bug) +- **CRITICAL FIX: Removed duplicate ProcessedCount increment**: Fixed skipped frame numbers + - **Problem**: `state.ProcessedCount++` was being called twice: + 1. In main loop (pulseox-monitor.go) - for every frame acquired + 2. In processor.go - when values changed and HASS posted + - **Result**: Every time values changed, the counter was incremented twice + - **Symptom**: Frame numbers would skip (e.g., #61, #62, skip #63, #64...) + - **Pattern**: Always skipped the frame number 2 positions after a HASS post + - **Fix**: Removed the duplicate increment in processor.go + - **Now**: Frame numbers increment consistently: #61, #62, #63, #64... + +### v3.48 (Major Performance Improvements) +- **CRITICAL: Moved digit image saves to DEBUG_MODE only**: Massive performance improvement + - **Problem**: OCR was saving 8 PNG files per frame (4 per display Ɨ 2 displays) + - checks.png visualization + - digit1.png, digit2.png, digit3.png (individual digits) + - full.png (labeled composite) + - **Impact**: ~5ms per PNG write Ɨ 8 files = 40ms of unnecessary file I/O per frame + - **Fix**: Wrapped all digit image saves in `if DEBUG_MODE` checks + - **Result**: OCR now takes ~9-12ms total (just template matching, no file I/O) + - **When needed**: Run with `/debug` flag to generate all digit images +- **Only save boxes.jpg on failures**: Reduced file I/O for successful frames + - Previously: Saved layout visualization (boxes.jpg) every time values changed (~51ms) + - Now: Only saves boxes.jpg when OCR fails (corruption, unrecognized, low confidence) + - Successful frames: No extra file I/O beyond what's needed +- **Performance comparison**: + - Before: Total ~80ms (Prep 10ms + Scale 4ms + OCR 20ms + FileIO 40ms + Valid 1ms + HASS 5ms) + - After: Total ~30ms (Prep 10ms + Scale 4ms + OCR 10ms + FileIO 0ms + Valid 1ms + HASS 5ms) + - **~60% faster processing** for normal operation +- **Debug workflow unchanged**: `/debug` flag still generates all images as before + +### v3.47 (Timing Instrumentation) +- **Added comprehensive timing measurements with /timing flag**: Enables detailed performance analysis + - **New /timing command line flag**: Activates timing mode (./pulseox-monitor /timing) + - **Horizontal timing table**: Shows timing breakdown for each frame + - Frame | Acquire | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total + - **Header printed every 20 frames**: Prevents scroll-back issues + - **Timing measurements**: + - **Acquire**: Frame acquisition from RTSP source - WAITING TIME (not processing) + - **Prep**: Preprocessing (crop timestamp + rotate 90°) + - **Scale**: Frame scaling to normalized width (if needed) + - **OCR_SpO2**: SpO2 display recognition (template matching) + - **OCR_HR**: HR display recognition (template matching) + - **Valid**: Validation checks (corruption, confidence, stability) - accumulated across all checks + - **FileIO**: Saving review images and debug files - accumulated across all file operations + - **HASS**: Home Assistant POST request (only when values change) + - **Total**: Wall-clock processing time (Prep + Scale + OCR + Valid + FileIO + logging overhead) + - **Note**: Acquire is separate (camera waiting time). Total may not equal sum due to logging/overhead. + - **Purpose**: Identify performance bottlenecks (e.g., excessive file I/O) + - **Implementation**: + - Added TimingData struct in types.go + - Added TIMING_MODE global flag + - Added printTimingTable() function in helpers.go + - Added timing measurements throughout processFrame() and processFrames() + - Timing data passed through entire processing pipeline + - **Example output**: + ``` + Frame | Acquire | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total + ------|---------|------|-------|----------|--------|-------|--------|------|------- + #123 | 2ms | 6ms | 0ms | 4ms | 4ms | 2ms | 38ms | 5ms | 61ms + #124 | 2ms | 6ms | 0ms | 4ms | 4ms | 1ms | 39ms | 5ms | 61ms + ``` + - **Benefits**: + - Quickly identify slow operations + - Track performance trends over time + - Validate optimization efforts + - Debug unexpected delays + +### v3.46 (Fixed "1" Detection + Raw Frame Improvements) +- **CRITICAL FIX: Lowered "1" digit detection threshold from 90% to 85%** + - **Problem**: "1" digits with 87-89% confidence were being rejected, causing misrecognition + - **Example**: "71" was being cut incorrectly because the "1" wasn't detected (87.1% < 90% threshold) + - **Result**: Wrong width used (100px instead of 72px), throwing off all cut positions + - **Fix**: Threshold lowered to 85% in `hasOneAt()` function (ocr.go:196) + - **Impact**: More reliable detection of "1" digits, especially with slight variations in brightness/focus +- **Auto-enable DEBUG_MODE for file processing** + - When testing with a file (e.g., `./pulseox-monitor raw_frames/thresh_*.png`), DEBUG_MODE automatically enabled + - Eliminates need to add `/debug` flag manually for single frame testing + - Implementation: pulseox-monitor.go:65-66 in `runSingleFrameMode()` +- **Added comprehensive debug output for digit detection** + - Shows display width at start of detection + - For each digit (3→2→1): + - Check position being tested + - hasOneAt() extraction region and confidence score + - Whether detected as "1" or not + - Width being used (72px vs 100px) + - Running total width + - Final CUT position calculated + - Region extraction coordinates and widths for all three digits + - Makes debugging cut position issues trivial - can see exact logic flow +- **Fixed raw frame saving to be truly raw** + - **Previous**: Frames were saved AFTER preprocessing (rotated, cropped) + - **Now**: Frames saved BEFORE any processing (portrait, with timestamp, untouched) + - **Processing applied**: Only thresholded (grayscale → binary) to save space as PNG + - **Benefit**: Can test raw frames with full preprocessing pipeline + - **Implementation**: Clone frame immediately after reading, pass to processFrame() +- **Fixed duplicate logging in test mode** + - **Problem**: Messages with `Both` target were printed twice in test mode + - **Cause**: globalLogger was set to os.Stdout (same as console output) + - **Fix**: Set globalLogger = nil in test mode (pulseox-monitor.go:71) + - **Result**: Clean, non-duplicated output when testing with files + +### v3.45 (Added Processing Time Measurements) +- **Added comprehensive timing measurements**: Now shows how long each frame takes to process + - **Frame processing time**: Displayed in console output: `SpO2=97%, HR=76 bpm [45ms]` + - **Frame acquisition interval**: Logged to file showing time since last frame acquired + - **Purpose**: Understand actual performance vs camera lag vs processing bottlenecks + - **What's measured**: + - Frame acquisition interval (target: 250ms for 4 fps) + - Total processing time per frame (OCR + validation + file I/O) + - Logged for both changed values (console) and unchanged values (file only) + - **Output examples**: + - Console: `SpO2=97%, HR=76 bpm [45ms]` (when value changes) + - Log file: `Frame #123: SpO2=97%, HR=76 bpm (no change) - processed in 42ms` + - Log file: `[TIMING] Frame acquired: +251ms since last (target: 250ms)` + - **Benefits**: + - Identify if processing time is exceeding 250ms target + - See actual frame rate vs target 4 fps + - Distinguish between camera timestamp lag and processing delays + - Debug performance issues with concrete data + - This addresses confusion about camera timestamp lag vs actual processing performance + +### v3.44 (Save Thresholded PNGs, Not Colored JPGs) +- **Changed error frame format**: Now saves thresholded black/white PNG instead of colored JPG + - **Rationale**: Thresholded frames show exactly what the OCR sees, making debugging much more effective + - **Previous**: Saved raw colored camera frames that required manual processing to debug + - **Now**: Saves thresholded (binary) PNG showing the exact image fed to template matching + - **Processing**: Grayscale conversion → threshold at 128 → save as PNG + - **Filename format**: `raw_frames/thresh_YYYYMMDD-NNNNN.png` + - **Saved on**: + - Corruption detection (invalid template match) + - Unrecognized digits (negative values) + - Low confidence (first attempt) + - Low confidence after retry + - **Benefits**: + - Can replay frames directly through OCR pipeline + - See exactly what template matching is working with + - Identify missing templates or threshold issues immediately + - No need to manually preprocess frames for debugging + - PNG format (lossless) vs JPG (lossy with compression artifacts) + - **Storage**: ~50-100KB per frame vs 200-400KB for colored JPG + +### v3.43 (Fixed Timing + Removed Raw Frames) +- **Fixed frame acquisition timing**: 250ms timer now starts from frame ACQUISITION, not processing completion + - **Previous behavior**: Waited 250ms after processing finished (variable frame rate) + - **New behavior**: Waits 250ms from when frame was acquired (consistent 4 fps) + - **Implementation**: Set `lastProcessedTime = now` BEFORE returning frame, not after processing + - **Example**: If processing takes 50ms, next frame acquired at exactly 250ms from previous acquisition + - **Result**: Guaranteed exactly 4 fps regardless of processing time variations +- **Removed raw frame saving**: System no longer saves unprocessed camera frames + - **Rationale**: Thresholded digit images (step4) are already saved for all failures + - **What's saved on errors**: + - review/f{N}_spo2_digit1.png, digit2, digit3 (thresholded crops) + - review/f{N}_hr_digit1.png, digit2, digit3 (thresholded crops) + - review/f{N}_spo2_full.png, hr_full.png (labeled displays) + - review/f{N}_spo2_checks.png, hr_checks.png (visualization) + - **Removed**: + - raw_frames/ directory no longer created + - rawFrame parameter removed from processFrame() + - rawFrame cloning removed from processFrames() loop + - **Benefits**: Reduced memory usage, simpler code, less disk I/O + - Thresholded images are more useful for debugging than raw camera frames + +### v3.42 (Time-Based Frame Processing) +- **Changed from skip-based to time-based frame processing**: More reliable and precise timing + - **Previous approach**: Process every 4th frame (variable timing depending on stream fps) + - **New approach**: Process frames at exactly 4 fps (250ms minimum interval) + - **Implementation**: + - Track `lastProcessedTime` timestamp + - Only process frame if 250ms has elapsed since last processed frame + - Guarantees consistent 4 fps processing rate regardless of stream fps + - **Benefits**: + - Predictable, consistent processing rate + - Works correctly even if stream fps varies + - Simpler to understand and configure + - **Updated NewRTSPSource()**: Removed `skipCount` parameter, now uses hardcoded 250ms interval + - Console message: "šŸ“Š Processing frames at 4 fps (250ms minimum interval)" +- **Confirmed**: All step debug images only saved in DEBUG_MODE + - step1_original.png (only with /debug flag) + - step2_decolorized.png (only with /debug flag) + - step3_grayscale.png (only with /debug flag) + - step4_thresholded.png (only with /debug flag) + +### v3.41 (Critical Performance Fix) +- **CRITICAL: Fixed excessive debug image writes causing high resource usage** + - **Problem**: Saving 8 debug images PER FRAME (4 per display x 2 displays) + - **Impact**: At 3.75 fps = ~30 images/second = massive disk I/O and CPU usage + - **Images being saved on every frame**: + - step1_original.png (colored extraction) + - step2_decolorized.png (after alarm mode decolorization) + - step3_grayscale.png (after grayscale conversion) + - step4_thresholded.png (after binary threshold) + - **Fix**: Only save these debug images when DEBUG_MODE is enabled + - **Result**: Normal operation now only saves essential files (digit crops, checks, full) + - **To enable debug images**: Run with `./pulseox-monitor /debug` flag + - **Performance improvement**: ~70% reduction in disk I/O, significant CPU savings + - This was a leftover from debugging alarm mode decolorization that should never have been in production + +### v3.40 (Actionable Layout Detection Errors) +- **Enhanced layout detection error diagnostics**: Now saves debug visualization when "Only 1 digit display found" error occurs + - **Problem**: Error message was not actionable - couldn't see what boxes were detected or why they failed + - **Fix**: Save timestamped debug image showing all center boxes with labels + - **Debug visualization shows**: + - All boxes crossing 50% line (center region) + - Green boxes = qualified as digit displays (height >= 110px) + - Cyan boxes = rejected (too short) + - Box labels showing dimensions: "#0: H=XXpx OK" or "#1: H=XXpx TOO SHORT" + - Red line showing 50% Y position + - Summary at top: "Found: X center boxes, Y digit displays (need 2)" + - **Enhanced console error output**: + - Shows count of center boxes found + - Lists each box with dimensions and qualification status + - "āœ“ QUALIFIED" for boxes that passed height requirement + - "āœ— TOO SHORT (< 110px)" for boxes that were rejected + - Path to debug image: test_output/layout_error_YYYYMMDD_HHMMSS.jpg + - Makes debugging layout failures much faster - can immediately see what system detected + +### v3.39 (Fixed Frame Scaling Order) +- **CRITICAL FIX: Frame scaling now happens in correct order** + - **Problem**: Boxes were calculated on original frame, then frame was scaled, causing coordinate mismatch + - **Root cause**: Layout detection calculated boxes on unscaled frame, but processFrame() scaled the frame later + - **Fix**: Layout detection now scales frame IMMEDIATELY after calculating scale factor (Step 4) + - **Process flow**: + 1. detectScreenLayoutAreas() calculates scale from bounding box width + 2. Scales frame to 860px width using gocv.Resize() + 3. **Scales ALL coordinate arrays** (bounding box, allBoxes, significantBoxes) + 4. ALL subsequent processing (contour detection, box calculation) uses scaled frame AND scaled coordinates + 5. Returns boxes in scaled coordinates + scale factor + 6. processFrame() scales every frame using same scale factor + 7. Boxes and frames now match perfectly + - **Code changes**: + - layout_detection.go: Added frame scaling at Step 4 (after scale calculation) + - layout_detection.go: Scale all coordinate arrays (scaledBoundingBox, scaledAllBoxes, scaledSignificantBoxes) + - All debug visualizations now use scaled frame (step 5-15 images) + - Removed tryAcquireLayout() wrapper function (unnecessary abstraction) + - pulseox-monitor.go: Calls detectScreenLayoutAreas() directly, stores layout+scale in state + - processor.go: Keeps scaling logic in processFrame() (needed for every frame) + - **Benefits**: + - Boxes calculated on 860px frame match frames that are scaled to 860px + - No more coordinate mismatches between layout detection and OCR processing + - Simpler code flow - no useless wrapper functions + - Alarm box trimming (Steps 8-10) preserved and working correctly + - **Verified**: Bounding box width log will show ~860px after scaling (was variable before) +- **BUG FIX: Review entries now use correct frame numbers** + - **Problem**: Successful readings used `state.FrameCount` (always 0) instead of actual `frameNum` + - **Result**: All images referenced f0_* instead of actual frame numbers + - **Fix**: Changed 3 instances in processor.go to use `frameNum` parameter + - Affected: Invalid physiological values, successful readings, unstable readings + - Failed readings (corruption, unrecognized, low confidence) were already correct +- **OPTIMIZATION: Only save images for frames added to review** + - **Problem**: Every processed frame saved images, even if values didn't change + - **Result**: review/ folder filled with unused images from StatusNoChange frames + - **Fix**: Delete digit images when values haven't changed (StatusNoChange) + - Saves disk space and makes review folder only contain relevant frames + - Deleted files: f*_spo2_checks.png, f*_spo2_digit[1-3].png, f*_spo2_full.png, f*_hr_*.png + +### v3.38 (Dynamic Frame Normalization) +- **Implemented dynamic frame scaling based on bounding box width**: Handles unstable device position + - **Problem**: Pulse oximeter lays loose on bed, causing frame width to vary + - **Solution**: Measure bounding box width in Step 4 of layout detection + - **Process**: + 1. Calculate scale factor: `scale = 860 / boundingBoxWidth` + 2. Log bounding box width and scale factor to console + 3. Store scale factor in ProcessingState.LockedScale + 4. Apply scaling to every frame before OCR processing + - **Changes**: + - `detectScreenLayoutAreas()` now returns `(*ScreenLayout, float64, error)` - includes scale factor + - `tryAcquireLayout()` captures and stores scale in state + - `processFrame()` applies scaling if `state.LockedScale != 1.0` + - Logs: "šŸ“Š Bounding box width: Xpx, Scale factor: Y (target: 860px)" + - **Benefits**: + - Consistent 860px normalized width regardless of camera position/zoom + - Layout coordinates remain stable across frames + - OCR accuracy maintained even when device moves + +### v3.37 (Fixed Layout Detection Height Threshold) +- **Fixed digit display height threshold**: Changed from > 120px to >= 110px + - **Problem**: Layout detection was failing because digit displays were exactly 110-120px tall + - **Previous code**: Required height > 120px (too restrictive) + - **Fixed code**: Requires height >= 110px (matches MIN_BOX_HEIGHT constant) + - **Added debug logging**: Shows dimensions of each center box and whether it's accepted/rejected + - Helps diagnose layout detection failures + - Consistent with original design intent (MIN_BOX_HEIGHT = 110) + +### v3.36 (Unified Logging) +- **Consolidated all logging to use logMessage() function**: Replaced all fmt.Printf(), fmt.Println(), and inconsistent logging patterns + - **ocr.go**: Converted template loading messages to use logMessage() + - Template warnings → Console + Warning level + - Template loading success → Console + Info level + - **frame_source.go**: Converted frame processing info message + - "Processing every Nth frame" → Console + Info level + - **normalize.go**: Converted all debug output messages + - Decolorization progress → LogFile + Debug level + - Debug file saves → LogFile + Debug/Info level + - **All files now use consistent logging pattern**: + - Startup/shutdown → Console + Info + - Main readings → Both + Info + - Warnings/errors → Both + Warning/Error + - Debug messages → LogFile + Debug (respects DEBUG_MODE flag) + - Progress indicators → Console + Info + - **Benefits**: + - Single source of truth for logging behavior + - Consistent timestamp formatting across all logs + - Proper level-based filtering + - Clean separation of console vs file logging + - Debug mode properly controls debug output + +### v3.35 (Logging System Implementation) +- **Implemented logMessage() function in helpers.go**: New unified logging system + - Target: Console, LogFile, Both + - Level: Debug, Info, Warning, Error + - Single-letter prefixes (D, I, W, E) for clean output + - Automatic DEBUG_MODE filtering for debug messages + - Timestamp formatting: [HH:MM:SS.mmm] + - Eliminates need for checking if logger != nil throughout codebase + +### v3.34 (Correct Order + Debug Images) +- **Fixed order of layout detection**: Center region FIRST, digit displays SECOND + - **Correct process order**: + 1. Find ALL contours with RetrievalList + 2. Collect all significant boxes (>50px width or height) + 3. Calculate bounding box from ALL boxes (not just digit displays) + 4. Find 50% Y line from overall bounding box + 5. Filter to boxes crossing 50% line (center region) + 6. From center boxes, identify digit displays (height >= 110px) + 7. Find alarm icons (center boxes extending beyond digit displays) + 8. Trim digit displays based on alarm icons + 9. Split by center X + 10. Find rightmost X in each half (using box center) + 11. Create fixed-width boxes (280px) + - **Debug images at every step** saved to test_output/: + - layout_step1_grayscale.jpg + - layout_step2_threshold.jpg + - layout_step3_eroded.jpg + - layout_step4_all_boxes.jpg (all boxes >50px in green) + - layout_step5_center_boxes.jpg (boxes crossing 50% line in cyan, red line) + - layout_step6_digit_displays.jpg (digit displays in magenta) + - layout_step7_alarm_icons.jpg (alarm icons in yellow, if any found) + - layout_step8_trimmed_displays.jpg (trimmed displays in green) + - layout_step9_final_boxes.jpg (final SpO2/HR boxes with labels) + - Key fix: Bounding box calculated from ALL contours, not just digit displays + - This prevents missing digit displays that are outside early bounding calculations + +### v3.33 (Contour Debug Logic + Center Region - WRONG ORDER) +- **Implemented contour_debug.go logic combined with center region approach**: As instructed + - **Exact implementation of contour_debug.go detection:** + 1. Find ALL contours with RetrievalList + 2. First pass: Identify digit displays (height >= 110px) + 3. Second pass: Find alarm icons (boxes extending beyond digit displays on right) + 4. Trim digit displays: Find leftmost alarm icon edge that overlaps, set as right boundary + - **Combined with center region logic:** + 5. Calculate bounding box from trimmed digit displays + 6. Find 50% Y line + 7. Filter to displays crossing 50% line (center region) + 8. Split by center X + 9. Find rightmost X in each half (using box center to determine which half) + 10. Create fixed-width boxes (CUT_WIDTH = 280px) + - **Key differences from v3.32:** + - Works with TRIMMED digit displays (after alarm icon trimming) + - Alarm icons properly detected and used to trim display boundaries + - Follows exact contour_debug.go algorithm before applying center logic + - Diagnostic output shows trimming actions when alarm icons detected + +### v3.32 (Fixed Layout Detection - WRONG APPROACH) +- **Reverted to proven layout detection logic**: Fixed incorrect box detection from v3.30 + - **Problem in v3.30/v3.31**: Alarm icon detection was finding wrong boxes (yellow alarm square instead of digit displays) + - **Fix**: Restored working center region approach without alarm icon complications + - **Process**: + 1. Find all significant contours (>50px width or height) + 2. Calculate overall bounding box + 3. Filter to boxes crossing 50% Y line with height >= 110px (digit displays) + 4. Create center region from filtered boxes + 5. Split by center X + 6. Find rightmost X in each half using **box center** to determine which half + 7. Create fixed-width boxes (CUT_WIDTH = 280px) + - Key insight: Use box center (not box edge) to determine left vs right half + - Alarm icon trimming removed for now - adds complexity without clear benefit + - Focuses on reliable detection of actual digit display boxes + - Debug images from v3.31 retained for troubleshooting + +### v3.31 (Decolorization After Extraction) +- **Moved decolorization to after display area extraction**: Layout detection now works on original colored frames + - **Problem**: Layout detection was trying to threshold colored alarm backgrounds, failing to find displays + - **Fix**: Layout detection now works on original colored frame (grayscale + threshold) + - **Decolorization moved**: Now happens in `recognizeDisplayArea()` AFTER extracting the small display regions + - **Process flow**: + 1. Preprocess (crop + rotate) - original colored frame + 2. Detect layout on colored frame (grayscale → threshold → find contours) + 3. Extract display areas (280x200px) - still colored + 4. **Decolorize extracted regions** (only ~56K pixels per display) + 5. Continue with OCR (grayscale → threshold → template matching) + - **Debug images added**: Each processing step now saves debug images: + - `f{N}_{display}_step1_original.png` - Original extracted colored region + - `f{N}_{display}_step2_decolorized.png` - After decolorization + - `f{N}_{display}_step3_grayscale.png` - After grayscale conversion + - `f{N}_{display}_step4_thresholded.png` - After thresholding (ready for OCR) + - Allows visual verification of each processing step + - Decolorization only affects OCR, not layout detection + +### v3.30 (Simplified Layout Detection) +- **Simplified layout detection with alarm icon trimming**: Combined best of both approaches + - **Uses RetrievalList** to find ALL contours (including nested ones like alarm icons) + - **Single contour pass**: No more complicated center region extraction and re-detection + - **Alarm icon detection**: Finds boxes that extend beyond digit displays (clock icons, alarm indicators) + - **Smart trimming**: Trims BOTH SpO2 and HR displays if alarm icons found, otherwise uses regular boxes + - **Process**: + 1. Find all contours with RetrievalList + 2. Identify digit displays (height >= 110px) + 3. Find alarm icons (boxes extending beyond digit displays on the right) + 4. Trim digit displays based on leftmost alarm icon edge (if any) + 5. Split displays into left (SpO2) and right (HR) halves using center X + 6. Find rightmost X in each half for display boundaries + 7. Create fixed-width boxes (CUT_WIDTH = 280px) + - Much simpler and more reliable than previous double-contour approach + - Combines center region logic with direct contour approach from contour_debug.go + +### v3.29 (Correct Decolorization Scope) +- **Fixed decolorization scope**: Removed wasteful full-frame decolorization + - **Problem in v3.28**: Decolorizing entire 1390x2736 frame (3.8M pixels) took ~2000ms + - **Reality**: Decolorization only needed for small display regions during OCR + - **Correct flow**: + 1. Preprocess (crop + rotate) - original colored frame + 2. Detect layout on original frame (works fine with colored backgrounds) + 3. Extract display areas (280x200px each) + 4. Decolorize ONLY display regions in `recognizeDisplayArea()` (~100K pixels) + 5. Run OCR on decolorized regions + - Decolorization already exists in `ocr.go` at the right place (line ~281) + - No need for full-frame decolorization - layout detection works on colored frames + - **Performance**: ~40x faster (decolorizing 100K pixels vs 3.8M pixels) + +### v3.28 (Decolorization Timer - REVERTED) +- **Added decolorization timing**: Timer measures how long decolorizeFullFrame() takes + - Timer added after preprocessing in main processing loop + - Displays: "[TIMER] Decolorization took: Xms" + - **Restored decolorization to pipeline**: Was defined but not being called! + - Decolorization now runs between preprocessing and layout detection + - Enables alarm mode support (colored backgrounds → black/white) + - Allows measurement and optimization of decolorization performance + - Frame lifecycle: raw → preprocess → decolorize → detect/process + +### v3.27 (Save ACTUAL Raw Frames) +- **CRITICAL FIX: Raw frames are now truly raw (unprocessed)** + - **Problem**: `raw_frames/raw_*.png` files were being saved AFTER preprocessing (rotation + crop) + - **Result**: When testing with "raw" frames, they were rotated AGAIN, making displays vertical instead of horizontal + - **Fix**: Clone raw frame immediately after reading from source (line 233) + - **Flow now**: + 1. Read frame from source (portrait orientation) + 2. Clone raw frame → keep for potential error saving + 3. Preprocess: crop timestamp + rotate 90° CW + 4. Decolorize + 5. Process + 6. IF error (corruption, low confidence, unrecognized) → save RAW clone + 7. Close raw frame clone + - Raw frames saved in `processor.go` at error points using the `rawFrame` parameter + - Raw frames are now truly unprocessed - can be used for testing by running through full pipeline + - Fixes issue where testing with raw frames would double-rotate them + +### v3.26 (Fixed Redundant Decolorization) +- **CRITICAL FIX: Eliminated 4x redundant decolorization calls**: Now decolorizes ONCE at top level only + - **Problem**: Frame was being decolorized 4 times for every layout detection: + 1. In `detectScreenWidth()` (2668x1458 - full frame) + 2. In `saveNormalizationDebug()` (2668x1458 - same frame again!) + 3. In `detectScreenLayoutAreas()` (2130x1164 - center region) + 4. In `detectScreenLayoutAreas()` center region (859x518 - smaller region) + - **Each call took 2-3 seconds** - total 8-12 seconds wasted per frame! + - **Fix**: Removed ALL decolorization calls from normalize.go and layout_detection.go + - **Now**: Single decolorization in `pulseox-monitor.go` at line 259-267 + - **Result**: Frame processing time reduced from ~12 seconds to ~3 seconds + - Decolorized frame passed to all layout functions - they expect pre-decolorized input + - Simplified `decolorizeFullFrame()` output - removed progress indicators (10%, 20%, etc.) + - Added comments documenting that functions expect already-decolorized frames + +### v3.25 (Aggressive Debug Output) +- **Added extensive debug image saves**: Every processing step now saves an image to test_output/ + - step1_original.jpg - Frame after preprocessing (rotated) + - step2_decolorized.jpg - After decolorizeFullFrame() (should be black/white) + - step3_grayscale.jpg - After grayscale conversion + - step4_thresholded.jpg - After binary threshold +- **Added decolorization progress tracking**: + - Shows progress every 10% of rows processed + - Shows final statistics: white pixel count/percentage, black pixel count/percentage + - Helps diagnose if decolorization is working correctly +- **Purpose**: Debug why alarm mode decolorization appears to not be working + +### v3.24 (Fixed Layout Decolorization) +- **Fixed missing decolorization in layout detection**: Layout detection was still using colored frames + - Added `decolorizeFullFrame()` call in `detectScreenLayoutAreas()` function + - Previously: Only `detectScreenWidth()` was decolorizing, but `detectScreenLayoutAreas()` was not + - Problem: Layout detection was converting colored frame directly to grayscale + - Result: Alarm mode colored backgrounds prevented contour detection + - Fix: Decolorize BEFORE grayscale conversion in both contour detection passes: + 1. Initial contour detection (full frame) + 2. Center region contour detection + - Now: Layout detection works correctly in both normal and alarm modes + - This was the missing piece preventing alarm mode from working end-to-end + +### v3.23 (Higher White Threshold) +- **Increased white pixel detection threshold**: Changed from 200 to 240 for more selective white detection + - Updated in both `decolorizeFullFrame()` (normalize.go) and `decolorizeAlarmMode()` (ocr.go) + - Previous threshold (200): Too permissive, caught near-white/gray pixels + - New threshold (240): More selective, only truly white pixels (near 255) are preserved + - Logic: If RGB all > 240 → keep as white (255), else → convert to black (0) + - Rationale: Display digits are pure white (255), not gray, so higher threshold is more accurate + - Better separation between white digits/borders and colored alarm backgrounds + +### v3.22 (Decolorize Before Layout) +- **Moved decolorization before layout detection**: Alarm mode now works end-to-end + - New `decolorizeFullFrame()` function in normalize.go + - Applied BEFORE layout detection (in `detectScreenWidth()`) + - Previously: Decolorization only happened during OCR (too late) + - Now: Decolorization happens first, then layout detection sees clean white-on-black displays + - Fixes issue where colored alarm backgrounds prevented layout detection + - Layout detection flow: + 1. Decolorize full frame (yellow/cyan → black, white → white) + 2. Convert to grayscale + 3. Threshold at 170 + 4. Find contours + - Same decolorization also applied in `saveNormalizationDebug()` for accurate debug visualization + - Result: System can now detect and recognize displays in both normal and alarm modes + +### v3.21 (Preprocessing Debug) +- **Added debug output for preprocessing**: Prints frame dimensions before and after preprocessing + - Shows: "Before preprocess: WxH" and "After preprocess: WxH" + - Helps verify that rotation is actually happening + - Expected: Portrait (e.g., 1080x1920) → Landscape (e.g., 1920x1080) + - Temporary diagnostic output to verify preprocessFrame() is working + +### v3.20 (Unified Preprocessing) +- **Verified unified preprocessing path**: Both RTSP streaming and file test modes use identical preprocessing + - All frames go through `preprocessFrame()` function: + 1. Crop timestamp (top 68 pixels) + 2. Rotate 90° clockwise (portrait → landscape) + - Applied in unified `processFrames()` loop (line 250) + - Works for both `RTSPSource` and `FileSource` + - **Important**: Raw frames saved to `raw_frames/` are saved AFTER preprocessing + - Therefore: Raw frames are already rotated and ready for testing + - If testing with unrotated frames, they'll go through preprocessing automatically + - Ensures consistent behavior regardless of frame source + - Layout detection expects landscape orientation (SpO2 and HR side-by-side horizontally) + +### v3.19 (Alarm Mode Support) +- **Added alarm mode decolorization**: System now recognizes digits even when pulse oximeter is in alarm state + - New `decolorizeAlarmMode()` function converts colored alarm backgrounds to black + - Alarm mode displays: + - SpO2: Yellow/orange background with white digits + - HR: Cyan/blue background with white digits + - Decolorization logic: + - Pixels where ALL RGB values > 200 → kept as white (digits) + - All other pixels → converted to black (colored backgrounds, borders) + - Applied before grayscale conversion in OCR pipeline + - Enables normal thresholding and template matching to work correctly + - Previously: Colored backgrounds would confuse thresholding, causing OCR failures + - Now: Alarm mode digits recognized just as reliably as normal mode + - Example: "90" in yellow box → converted to "90" in black/white → OCR succeeds + +### v3.18 (Zero Validation) +- **Added '0' digit validation**: Prevents false '0' recognition by checking for characteristic center hole + - New `validateZero()` function checks for a hole at 50% Y (center) + - Scans horizontally from 30%-70% X looking for black pixels (hole) + - Wider horizontal range than '8' validation (30-70% vs 40-50%) since '0' hole is larger + - If validation fails, marks digit as invalid (-1) triggering retry/re-detection + - Applied to all three digit positions (digit1, digit2, digit3) + - Complements existing '8' validation (two holes at 30% and 70% Y) + - Helps prevent misrecognition of corrupted/partial digits or digits with missing centers as '0' + - Example log output: + ``` + validateZero: center hole @ y=50 (50%) x=15-35 (true) + ``` + +### v3.17 (Enhanced Cleanup) +- **Enhanced directory cleanup on startup**: Now cleans all output directories in streaming mode + - Previously: Only `review/` was cleaned on startup + - Now: Cleans `review/`, `raw_frames/`, and `test_output/` in streaming mode + - Single-frame test mode: No cleanup (preserves test output) + - Progress display: Shows each directory being cleaned with checkmarks + - Example output: + ``` + šŸ—‘ļø Cleaning output directories... + - review/... āœ“ + - raw_frames/... āœ“ + - test_output/... āœ“ + ``` + - Rationale: `raw_frames/` accumulates failed recognition frames over time; clean slate each run + - Rationale: `test_output/` contains debug visualizations that should be fresh per session + +### v3.16 (Append-Only HTML Review) +- **Implemented append-only HTML generation**: Review HTML is now written incrementally instead of all at once on shutdown + - HTML header written once at startup via `initReviewHTML()` + - Each frame entry appended immediately via `appendReviewEntry()` as it's processed + - Footer written on shutdown via `closeReviewHTML()` + - Browser handles missing closing tags gracefully, allowing live refresh + - Benefits: + - āœ… Live updates: refresh browser anytime to see latest frames + - āœ… No regeneration overhead: just one file write per frame + - āœ… Can run for hours without memory/performance issues + - āœ… No data loss if process crashes (all processed frames already written) + - All review entry points now consistently call `appendReviewEntry()`: + - Corruption detection + - Unrecognized digits + - Low confidence (both first attempt and retry) + - Invalid physiological values + - Successful readings + - Unstable readings (held for validation) + - Fixed duplicate `appendReviewEntry()` calls in corruption handler (was calling 3 times) + +### v3.15 (Fixed Eight Validation) +- **Fixed validateEight() probe positions**: Corrected hole detection based on actual measurements + - Issue: Was probing at 33% and 67% which hit the white digit and middle bar + - Fix: Now checks horizontal line segments at 30% and 70% height + - Scans X from 40% to 50% of width (10% range centered around middle) + - Looks for ANY black pixel (< 128) in that range = hole detected + - Much more robust than single pixel check - tolerates slight variations + - Holes are BLACK (empty space), digit is WHITE in thresholded image + - Prevents false rejection of valid "8" digits + +### v3.14 (Corruption Debug Output) +- **Added debug output for corruption cycles**: Helps diagnose repeated corruption detection + - When corruption is detected, saves debug files to `test_output/` with timestamp + - Saves normalized frame: `corruption_YYYYMMDD_HHMMSS_frameN_normalized.png` + - Saves layout visualization: `corruption_YYYYMMDD_HHMMSS_frameN_layout.jpg` + - Also includes all digit crops from review/ directory (already generated by OCR) + - Allows offline analysis when system gets stuck in corruption → re-detect → corruption cycles + - Log message: "šŸ’¾ Debug saved: YYYYMMDD_HHMMSS" + +### v3.13 (RTSP Reconnect Fix + Eight Validation) +- **Fixed segmentation fault on RTSP reconnect failure**: Prevents crash when stream reconnection fails + - Added null check before reading from stream + - Sets stream to nil and marks source as closed after failed reconnect + - Returns shouldContinue=false to stop processing instead of attempting to read from null stream + - Prevents SIGSEGV when VideoCapture is null +- **Added '8' digit validation**: Prevents false '8' recognition by checking for two characteristic holes + - New `validateEight()` function probes two specific regions (at 1/3 and 2/3 height) + - Checks if both regions are at least 60% black (indicating holes) + - If validation fails, marks digit as invalid (-1) triggering retry/re-detection + - Applied to all three digit positions (digit1, digit2, digit3) + - Helps prevent misrecognition of corrupted/partial digits as '8' + +### v3.12 (Physiological Value Validation) +- **Added minimum value threshold for Home Assistant posting**: Prevents posting physiologically impossible values + - Validation rule: Both SpO2 and HR must be >= 40 to post to Home Assistant + - Values below 40 are logged to file and added to review, but NOT posted to HASS + - Console displays: "āš ļø Values below 40 - not posting to HASS" + - Rationale: SpO2 or HR below 40 are not physiologically possible in living patients + - Prevents false alarms and invalid data in Home Assistant database + - Common scenarios triggering this: + - Pulse oximeter disconnected (shows dashes, may be misread as low numbers) + - Recognition errors resulting in single-digit values (0-9) + - Display corruption being partially recognized + +### v3.11 (Reduced Raw Frame Storage) +- **Raw frames now only saved on recognition failures**: Significantly reduces disk space usage + - Previously: Every processed frame saved to `raw_frames/` (all successes and failures) + - Now: Only saves frames when recognition fails (corruption, unrecognized, low confidence) + - Successful readings and unstable readings (held for validation) no longer saved + - Failure cases where frames are saved: + - āŒ Corrupted frames (invalid pattern matched) + - āŒ Unrecognized digits (negative values) + - āŒ Low confidence readings (both first attempt and retry) + - Typical result: ~90% reduction in raw frame storage for stable monitoring + - Failed frames still available for debugging and training data collection + +### v3.10 (Corruption Frame Number) +- **Added frame number to corruption log message**: Easier debugging of false corruption detections + - Message now shows: `[CORRUPTION] Frame #123 - Digit = -1 detected: SpO2(9,5) HR(7,-1) - skipping frame` + - Helps identify which specific frame triggered invalid pattern match + - Useful for reviewing frames in HTML output and approving/removing invalid templates + +### v3.9 (Simplified Digit Detection Arrays) +- **Simplified digit detection to use only arrays**: Removed all intermediate variables + - `D[1,2,3]` array stores cut positions for digit extraction + - `digitIsOne[1,2,3]` array stores detection results (is it a "1"?) + - Eliminated intermediate variables like digit1IsOne, digit2IsOne, digit3IsOne, middleStart, rightStart + - Code now directly uses array notation throughout: `digitIsOne[2]`, `D[3]`, etc. + - Cleaner, more maintainable code with single source of truth +- **Fixed "13" recognition bug**: D1 position now correctly calculated from accumulated widths + - Loop accumulates widths: digit3 → digit2 → digit1 + - D1 check position = w - digit3Width - digit2Width - 5 + - Previously was checking at wrong position when only 2 digits present + +### v3.8 (Code Cleanup & Digit Detection Loop) +- **Added helpers.go**: New file with `logf()` helper function + - Eliminates repetitive `if logger != nil` checks throughout codebase + - Makes logging calls cleaner and more maintainable + - Applied across processor.go, validators.go, and ocr.go +- **Refactored digit detection to use loop**: Replaced repetitive digit checking code with elegant loop + - Uses arrays for digit names, detection results, and widths + - Accumulates width progressively (digit3 → digit2 → digit1) + - More maintainable and easier to understand + - Eliminates code duplication + +### v3.7 (3-Digit Value Calculation Fix) +- **Fixed 3-digit value calculation bug**: Removed score requirement for digit1 detection + - Issue: When `digit1IsOne` detected a "1" digit, code also required template match score > 50% + - Problem: digit1 region includes empty padding, causing low template match scores + - Result: 106 was calculated as 6, 103 as 3, 104 as 4 + - Fix: Trust `hasOneAt()` detection alone - if digit1IsOne is true, use 100 + num2*10 + num3 + - The `hasOneAt()` function already confirms there's a "1" pattern at the correct position + +### v3.6 (Millisecond Timestamps) +- **Added millisecond precision to timestamps**: Changed timestamp format from `15:04:05` to `15:04:05.000` + - Provides more precise timing for frame processing and reading logs + - Helpful for debugging timing issues and frame processing delays + - Format: `[03:03:17.245] SpO2=93%, HR=85 bpm` + +### v3.5 (Frame Rate + Timestamp Validation) +- **Frame rate adjustment**: Changed from 30fps to 15fps camera + - Adjusted processing from every 6th frame to every 4th frame + - Processing rate: ~3.75fps (was ~5fps) + - Camera was struggling with 30fps, reduced to 15fps +- **Timestamp validation**: Added OCR check of camera timestamp + - Validates timestamp every 10 processed frames (~2.5 seconds) + - Warns if camera time differs by >3 seconds from server + - Uses optimized gosseract library (~22ms per check) + - **Optimizations**: Downscale 50%, grayscale, Otsu threshold, invert, PNG encode + - **Reused OCR client** for major speed boost (was 120ms, now 22ms warm) + - Silent when drift is within ±3 seconds + - Helps detect frozen/lagging streams +- **New file**: `timestamp_ocr.go` - Timestamp extraction and validation + +### v3.4 (Low Confidence Counter) +- **Added low confidence counter**: Tracks and displays frequency of low confidence frames + - New `LowConfidenceCount` field in ProcessingState + - Console displays count when it increments: "Low confidence (#X)" + - Helps identify camera/lighting issues needing attention + - Counter appears for both first detection and retry failures + +### v3.3 (Corruption Detection Fix) +- **Fixed corruption detection**: Direct check prevents invalid frames from reaching stability + - Changed from `validateCorruption()` wrapper to direct `IsCorrupted()` call + - ANY digit = -1 (invalid template match) now immediately returns StatusCorrupted + - Prevents invalid/corrupted frames from being marked as "unstable" + - Ensures clean separation: corruption → silent skip, low confidence → retry + +### v3.2 (Review & Signal Handling) +- **Fixed Ctrl+C handling**: Simplified signal handling to prevent segfaults + - Removed stream.Close() from signal handler (was causing SIGSEGV) + - Stream cleanup now happens naturally on program exit + - Sets closed flag only, checks happen between frame reads +- **Enhanced review.html**: Failed frames now show all digit images + - Corruption, low confidence, and unrecognized frames display digit crops + - Allows reviewing and approving digits from failed recognitions + - Red background distinguishes failed frames +- **Silenced corruption detection**: Invalid patterns work quietly + - Corrupted frames no longer appear in console or HTML + - Only logged to file for debugging purposes + - Keeps review focused on actionable items + +### v3.1 (Optimizations) +- **Removed FPS display**: Cleaned up console output + - No more "Stream FPS: X (avg over Y frames)" messages + - Simpler, focused console showing only readings and warnings +- **Negative value handling**: Unrecognized digits now trigger immediate retry + - SpO2 or HR < 0 (OCR returned -1) treated as low confidence + - Triggers immediate next frame read + layout re-detection + - Prevents false "unstable" warnings for blurry/unrecognized frames +- **Frame rate adjustment**: Changed from every 4th to every 6th frame + - ~5 fps processing rate from 30fps stream + - Better balance for 30fps camera (was tuned for 15fps) + +### v3.0 (Refactored) +- **Modular architecture**: Split monolithic pulse-monitor.go into focused modules + - processor.go: Frame processing orchestration + - validators.go: Validation logic + - frame_source.go: Abstracted sources (RTSP/file) + - types.go: Shared data structures + - normalize.go: Frame normalization +- **Smart escalation strategy**: Progressive failure handling + - 1st failure: Try next frame (0s wait) + - 2nd failure: Re-detect layout (0s wait) + - 3rd failure: Wait 10s + - 4th failure: Wait 30s + - 5th+ failures: Wait 60s + - Success resets counter +- **Interruptible sleep**: Ctrl+C works immediately during waits + - Checks `source.IsActive()` every second + - No more forced 60s waits before shutdown +- **ConsecutiveFailures counter**: Tracks problems across processing + - Incremented on layout failures, corruption, low confidence + - Reset on successful processing + - Enables intelligent escalation +- See V3_CHANGES.md for complete migration guide + +### v2.35 +- **Robust layout detection under varying light conditions** + - Changed Step 3 logic: instead of picking "2 largest boxes", find rightmost X in each half + - Splits detection area at X-center, finds max(rightX) for left half (SpO2) and right half (HR) + - Handles digit fragmentation: even if "95" breaks into "9" and "5" contours, still finds correct right edge + - Tracks Y range across all contours in each half for proper bounding +- **Raw frame archiving** + - Saves every processed frame to `raw_frames/raw_YYYYMMDD-NNNNN.png` (persistent directory) + - Enables offline testing and replay of detection algorithms + - Files are the rotated frames (after timestamp crop), exactly as fed to detection pipeline + - Not cleaned on restart - accumulates for historical analysis + +### v2.34 +- Every 4th frame processing (improved responsiveness) +- Simplified corruption handling (no immediate retry) +- Silent corruption detection (log only) +- Digit 3 detection offset: -8 pixels +- Invalid template: 2 patterns + +### Previous versions +- v2.33: Added hindsight validation for large deltas +- v2.32: Implemented low confidence retry with layout re-detection +- v2.31: Added invalid digit detection +- Earlier: Various contour detection and template matching improvements + +--- + +## Contact & Context + +**Development Environment:** macOS (local development and testing) +**Target Deployment:** Ubuntu 22.04 with Docker +**Home Assistant Version:** (check your setup) +**Camera:** (specify your RTSP camera model) + +**Project Location:** `/Users/johanjongsma/pulse-monitor/` + +--- + +## Quick Reference + +### Start monitoring +```bash +cd /Users/johanjongsma/pulse-monitor +./pulseox-monitor +``` + +### Stop and generate review +``` +Ctrl+C +# Opens review/review.html automatically +``` + +### Check logs +```bash +tail -f pulseox-monitor_*.log +``` + +### Approve training digit +```bash +# Move good digit image to training_digits/ +# Naming: {digit}_{variant}.png +``` + +### Mark as invalid/corruption +```bash +# Move bad pattern to training_digits/invalid/ +# Naming: descriptive name like "white_blob_2.png" +``` + +--- + +**Document Purpose:** This document serves as a comprehensive reference for understanding the project state, architecture, and reasoning. It should enable a new chat session (or developer) to quickly understand what's been built, why decisions were made, and how to continue development. + +**Last Verified Working:** November 15, 2025 (v3.52 - CPU optimization complete, 30% usage) diff --git a/backups/backup_20251030_043342/config.go b/backups/backup_20251030_043342/config.go new file mode 100644 index 0000000..4c74768 --- /dev/null +++ b/backups/backup_20251030_043342/config.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +type Config struct { + Camera struct { + RTSPURL string `yaml:"rtsp_url"` + } `yaml:"camera"` + HomeAssistant struct { + URL string `yaml:"url"` + Token string `yaml:"token"` + } `yaml:"home_assistant"` + Processing struct { + SampleInterval int `yaml:"sample_interval"` + ChangeThresholdPercent int `yaml:"change_threshold_percent"` + MaxDriftSeconds int `yaml:"max_drift_seconds"` + } `yaml:"processing"` + Logging struct { + Level string `yaml:"level"` + File string `yaml:"file"` + } `yaml:"logging"` +} + +func LoadConfig(filename string) (*Config, error) { + data, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("failed to read config file: %w", err) + } + + var config Config + if err := yaml.Unmarshal(data, &config); err != nil { + return nil, fmt.Errorf("failed to parse config file: %w", err) + } + + return &config, nil +} diff --git a/backups/backup_20251030_043342/config.yaml b/backups/backup_20251030_043342/config.yaml new file mode 100644 index 0000000..edfe6fc --- /dev/null +++ b/backups/backup_20251030_043342/config.yaml @@ -0,0 +1,25 @@ +# Pulse Oximeter Monitor Configuration + +camera: + # Tapo C110 RTSP URL (pulse ox monitoring camera) + rtsp_url: "rtsp://tapohass:!!Helder06@192.168.2.183:554/stream1" + +home_assistant: + # TS140 server IP where HASS is hosted + url: "http://192.168.1.252:8123" + # Long-lived access token for pulse-monitor + token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiIzOTMxMTA4MjczYzI0NDU1YjIzOGJlZWE0Y2NkM2I1OCIsImlhdCI6MTc2MTExNTQxNywiZXhwIjoyMDc2NDc1NDE3fQ.URFS4M0rX78rW27gQuBX-PyrPYMLlGujF16jIBHXYOw" + +processing: + # How often to capture and process frames (seconds) + sample_interval: 1 + # Reject readings that change more than this percent + change_threshold_percent: 5 + # Maximum allowed drift INCREASE in seconds (detects camera lag/freeze) + # Camera can be 10s behind server consistently - that's fine + # But if drift increases by more than this, frame is stale + max_drift_seconds: 3 + +logging: + level: "info" # debug, info, warn, error + file: "./logs/pulse-monitor.log" \ No newline at end of file diff --git a/backups/backup_20251030_043342/homeassistant.go b/backups/backup_20251030_043342/homeassistant.go new file mode 100644 index 0000000..361fb29 --- /dev/null +++ b/backups/backup_20251030_043342/homeassistant.go @@ -0,0 +1,43 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +func postToHomeAssistant(config *Config, entityID string, value int, unit string, friendlyName string) error { + url := fmt.Sprintf("%s/api/states/%s", config.HomeAssistant.URL, entityID) + payload := map[string]interface{}{ + "state": fmt.Sprintf("%d", value), + "attributes": map[string]interface{}{ + "unit_of_measurement": unit, + "friendly_name": friendlyName, + "device_class": "measurement", + }, + } + jsonData, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal error: %w", err) + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("request error: %w", err) + } + req.Header.Set("Authorization", "Bearer "+config.HomeAssistant.Token) + req.Header.Set("Content-Type", "application/json") + client := &http.Client{Timeout: 5 * time.Second} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("HTTP error: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != 200 && resp.StatusCode != 201 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) + } + return nil +} diff --git a/backups/backup_20251030_043342/html_report.go b/backups/backup_20251030_043342/html_report.go new file mode 100644 index 0000000..bdb0055 --- /dev/null +++ b/backups/backup_20251030_043342/html_report.go @@ -0,0 +1,281 @@ +package main + +import ( + "fmt" + "os" +) + +type ReviewEntry struct { + FrameNum int + Timestamp string + SpO2Value int + SpO2LeftDigit int + SpO2LeftConf float64 + SpO2RightDigit int + SpO2RightConf float64 + HRValue int + HRLeftDigit int + HRLeftConf float64 + HRRightDigit int + HRRightConf float64 + Failed bool + FailureReason string + Unstable bool + UnstableReason string +} + +// initReviewHTML creates the HTML file with header and opens the table +func initReviewHTML() error { + html := ` + + + + Pulse-Ox Recognition Review + + + + + + +

Pulse-Ox Recognition Review (Live)

+

Tip: Press Ctrl+F (or Cmd+F on Mac) and search for "CHECK" to find frames with low confidence (<85%)

+

Refresh page to see latest frames...

+ + + + + + + + +` + return os.WriteFile("review/review.html", []byte(html), 0644) +} + +// appendReviewEntry appends a single entry to the HTML file +func appendReviewEntry(e ReviewEntry) error { + confClass := func(conf float64) string { + if conf >= 85 { + return "conf-high" + } else if conf >= 70 { + return "conf-med" + } + return "conf-low" + } + + needsReview := e.SpO2LeftConf < 85 || e.SpO2RightConf < 85 || e.HRLeftConf < 85 || e.HRRightConf < 85 + reviewMarker := "" + if needsReview { + reviewMarker = " āš ļøCHECK" + } + + var rowHTML string + + if e.Failed { + // Failed recognition entry + rowHTML = fmt.Sprintf(` + + + + + + +`, e.FrameNum, reviewMarker, e.Timestamp, e.FrameNum, e.FrameNum, e.FailureReason, + e.SpO2Value, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.SpO2LeftDigit, confClass(e.SpO2LeftConf), e.SpO2LeftConf, + e.FrameNum, e.FrameNum, e.SpO2RightDigit, confClass(e.SpO2RightConf), e.SpO2RightConf, + e.HRValue, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.HRLeftDigit, confClass(e.HRLeftConf), e.HRLeftConf, + e.FrameNum, e.FrameNum, e.HRRightDigit, confClass(e.HRRightConf), e.HRRightConf) + } else { + // Successful recognition entry + unstableMarker := "" + if e.Unstable { + unstableMarker = fmt.Sprintf("
āš ļø %s", e.UnstableReason) + } + rowHTML = fmt.Sprintf(` + + + + + + +`, e.FrameNum, reviewMarker, unstableMarker, e.Timestamp, + e.FrameNum, e.FrameNum, + e.SpO2Value, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.SpO2LeftDigit, confClass(e.SpO2LeftConf), e.SpO2LeftConf, + e.FrameNum, e.FrameNum, e.SpO2RightDigit, confClass(e.SpO2RightConf), e.SpO2RightConf, + e.HRValue, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.HRLeftDigit, confClass(e.HRLeftConf), e.HRLeftConf, + e.FrameNum, e.FrameNum, e.HRRightDigit, confClass(e.HRRightConf), e.HRRightConf) + } + + // Open file in append mode + f, err := os.OpenFile("review/review.html", os.O_APPEND|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(rowHTML) + return err +} + +// closeReviewHTML writes the closing tags +func closeReviewHTML() error { + footer := `
FrameTimeDetected BoxesSpO2 RecognitionHR Recognition
%d%s%s + + + āŒ FAILED: %s
+ SpO2: %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+ HR: %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
%d%s%s%s + + + %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+ %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+

+ Color coding: + Green ≄85%, + Orange 70-84%, + Red <70% +

+

To add digits to training set, copy files like: cp review/f5_spo2_digit2.png training_digits/9_2.png

+ +` + + f, err := os.OpenFile("review/review.html", os.O_APPEND|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(footer) + return err +} + +// writeReviewHTML - kept for backward compatibility (final shutdown) +func writeReviewHTML(entries []ReviewEntry) error { + // Just close the HTML properly - entries already appended + return closeReviewHTML() +} diff --git a/backups/backup_20251030_043342/layout_detection.go b/backups/backup_20251030_043342/layout_detection.go new file mode 100644 index 0000000..728de64 --- /dev/null +++ b/backups/backup_20251030_043342/layout_detection.go @@ -0,0 +1,743 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "math" + "sort" + "time" + + "gocv.io/x/gocv" +) + +type Display struct { + IsSplit bool // true = one box split in half, false = two separate boxes + FullRect image.Rectangle // used when IsSplit=true + LeftRect image.Rectangle // used when IsSplit=false + RightRect image.Rectangle // used when IsSplit=false +} + +type ScreenLayout struct { + SpO2Area image.Rectangle + HRArea image.Rectangle +} + +func detectScreenLayoutAreas(rotated gocv.Mat) (*ScreenLayout, float64, error) { + // Correct order: Find center region FIRST (using all contours), THEN find digit displays + + startTime := time.Now() + + // Input is already thresholded binary image - work directly on it + // No grayscale, no threshold - just find contours + + // DEBUG: Save input to see what we're working with + if DEBUG_MODE { + gocv.IMWrite("test_output/layout_step1_input.jpg", rotated) + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step1_input.jpg") + } + + // STEP 3: Apply erosion to separate nearby elements (5x5 kernel) - COMMENTED OUT + // step3Start := time.Now() + // kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Pt(5, 5)) + // eroded := gocv.NewMat() + // gocv.Erode(thresh, &eroded, kernel) + // kernel.Close() + // defer eroded.Close() + // fmt.Printf(" [TIMING] Step 3 (Erosion): %dms\n", time.Since(step3Start).Milliseconds()) + + // STEP 2: Find contours - use RetrievalList to get ALL contours including nested + step2Start := time.Now() + contours := gocv.FindContours(rotated, gocv.RetrievalList, gocv.ChainApproxSimple) + defer contours.Close() + + logMessage(Both, Info, " Found %d total contours", contours.Size()) + if contours.Size() == 0 { + logMessage(Both, Error, " ERROR: No contours found in thresholded frame") + logMessage(Both, Error, " Frame dimensions: %dx%d", rotated.Cols(), rotated.Rows()) + // Save debug image + gocv.IMWrite("test_output/error_input.jpg", rotated) + logMessage(Both, Error, " Saved debug image to test_output/error_input.jpg") + return nil, 0, nil + } + + logMessage(LogFile, Debug, " [TIMING] Step 2 (Find contours): %dms", time.Since(step2Start).Milliseconds()) + logMessage(LogFile, Debug, " [TIMING] Step 2 (Total): %dms", time.Since(startTime).Milliseconds()) + + // STEP 1: Collect ALL boxes (no filter) and print details + var allBoxes []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + allBoxes = append(allBoxes, rect) + // fmt.Printf(" Box %d: X=%d-%d (W=%d), Y=%d-%d (H=%d)\n", + // i, rect.Min.X, rect.Max.X, rect.Dx(), + // rect.Min.Y, rect.Max.Y, rect.Dy()) + } + + if len(allBoxes) == 0 { + logMessage(Both, Error, " ERROR: No bounding boxes found from contours") + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d boxes (all, no filter)", len(allBoxes)) + logMessage(LogFile, Debug, "") + + // DEBUG: Skip step 6a and 6b visualizations + // rawContoursVis := rotated.Clone() + // ... + // allBoxesVis := rotated.Clone() + // ... + + // STEP 2: Filter to significant boxes (width or height > 30px) for processing + var significantBoxes []image.Rectangle + for _, box := range allBoxes { + if box.Dx() > 30 || box.Dy() > 30 { + significantBoxes = append(significantBoxes, box) + } + } + + if len(significantBoxes) == 0 { + logMessage(Both, Error, " ERROR: No significant boxes found (>30px)") + logMessage(Both, Error, " All %d boxes were too small", len(allBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d significant boxes (>30px) for processing", len(significantBoxes)) + + // STEP 3: Calculate bounding box from significant boxes (not just digit displays) + minX := significantBoxes[0].Min.X + minY := significantBoxes[0].Min.Y + maxX := significantBoxes[0].Max.X + maxY := significantBoxes[0].Max.Y + + for _, box := range significantBoxes { + if box.Min.X < minX { + minX = box.Min.X + } + if box.Min.Y < minY { + minY = box.Min.Y + } + if box.Max.X > maxX { + maxX = box.Max.X + } + if box.Max.Y > maxY { + maxY = box.Max.Y + } + } + + boundingBox := image.Rect(minX, minY, maxX, maxY) + boundingBoxWidth := boundingBox.Dx() + logMessage(LogFile, Debug, " Bounding box from all boxes: X=%d-%d, Y=%d-%d, Width=%d", minX, maxX, minY, maxY, boundingBoxWidth) + + // Calculate required scale to normalize to 860px width + const TARGET_WIDTH = 860 + scale := float64(TARGET_WIDTH) / float64(boundingBoxWidth) + logMessage(Both, Info, " šŸ“Š Bounding box width: %dpx, Scale factor: %.3f (target: %dpx)", boundingBoxWidth, scale, TARGET_WIDTH) + + // STEP 4: Scale the frame to 860px width + step4Start := time.Now() + newWidth := int(float64(rotated.Cols()) * scale) + newHeight := int(float64(rotated.Rows()) * scale) + scaled := gocv.NewMat() + gocv.Resize(rotated, &scaled, image.Pt(newWidth, newHeight), 0, 0, gocv.InterpolationLinear) + defer scaled.Close() + logMessage(LogFile, Debug, " [TIMING] Step 4 (Scale frame): %dms", time.Since(step4Start).Milliseconds()) + logMessage(LogFile, Debug, " Scaled frame: %dx%d -> %dx%d", rotated.Cols(), rotated.Rows(), newWidth, newHeight) + + // CRITICAL: Scale the bounding box coordinates to match the scaled frame + scaledMinX := int(float64(minX) * scale) + scaledMaxX := int(float64(maxX) * scale) + scaledMinY := int(float64(minY) * scale) + scaledMaxY := int(float64(maxY) * scale) + scaledBoundingBox := image.Rect(scaledMinX, scaledMinY, scaledMaxX, scaledMaxY) + logMessage(LogFile, Debug, " Scaled bounding box: X=%d-%d, Y=%d-%d (from original: X=%d-%d, Y=%d-%d)", + scaledMinX, scaledMaxX, scaledMinY, scaledMaxY, minX, maxX, minY, maxY) + + // Scale all significant boxes coordinates too + var scaledSignificantBoxes []image.Rectangle + for _, box := range significantBoxes { + scaledBox := image.Rect( + int(float64(box.Min.X)*scale), + int(float64(box.Min.Y)*scale), + int(float64(box.Max.X)*scale), + int(float64(box.Max.Y)*scale), + ) + scaledSignificantBoxes = append(scaledSignificantBoxes, scaledBox) + } + + // Scale all boxes too (for visualization) + var scaledAllBoxes []image.Rectangle + for _, box := range allBoxes { + scaledBox := image.Rect( + int(float64(box.Min.X)*scale), + int(float64(box.Min.Y)*scale), + int(float64(box.Max.X)*scale), + int(float64(box.Max.Y)*scale), + ) + scaledAllBoxes = append(scaledAllBoxes, scaledBox) + } + + // All subsequent processing now works on the SCALED frame with SCALED coordinates + // This ensures boxes are calculated in scaled coordinates + + // STEP 5: Find 50% line + height := scaledBoundingBox.Dy() + line50 := scaledBoundingBox.Min.Y + height/2 + logMessage(LogFile, Debug, " 50%% line at Y=%d", line50) + + // DEBUG: Save step 5 visualization - bounding box and 50% line + bboxVis := scaled.Clone() + // Draw bounding box in blue + gocv.Rectangle(&bboxVis, scaledBoundingBox, color.RGBA{0, 100, 255, 255}, 3) + // Draw 50% line in red + gocv.Line(&bboxVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 3) + // Add labels + gocv.PutText(&bboxVis, fmt.Sprintf("BBox: %d-%d, %d-%d", scaledMinX, scaledMaxX, scaledMinY, scaledMaxY), + image.Pt(scaledMinX+10, scaledMinY+30), + gocv.FontHersheyPlain, 1.2, color.RGBA{0, 100, 255, 255}, 2) + gocv.PutText(&bboxVis, fmt.Sprintf("50%% line: Y=%d", line50), + image.Pt(scaledMinX+10, line50-10), + gocv.FontHersheyPlain, 1.2, color.RGBA{255, 0, 0, 255}, 2) + gocv.IMWrite("test_output/layout_step5_bbox_and_line.jpg", bboxVis) + bboxVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step5_bbox_and_line.jpg") + + // DEBUG: Save step 6 - all boxes with numbers + allBoxesVis := scaled.Clone() + for i, box := range scaledAllBoxes { + gocv.Rectangle(&allBoxesVis, box, color.RGBA{0, 255, 0, 255}, 2) + // Add box number + gocv.PutText(&allBoxesVis, fmt.Sprintf("%d", i), + image.Pt(box.Min.X+5, box.Min.Y+15), + gocv.FontHersheyPlain, 1.2, color.RGBA{0, 255, 0, 255}, 2) + } + gocv.IMWrite("test_output/layout_step6_all_boxes.jpg", allBoxesVis) + allBoxesVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step6_all_boxes.jpg") + + // STEP 6: Filter to boxes crossing 50% line (center region) + var centerBoxes []image.Rectangle + for _, box := range scaledSignificantBoxes { + if box.Min.Y < line50 && box.Max.Y > line50 { + centerBoxes = append(centerBoxes, box) + } + } + + if len(centerBoxes) == 0 { + logMessage(Both, Error, " ERROR: No boxes crossing 50%%%% line") + logMessage(Both, Error, " 50%%%% line at Y=%d, checked %d significant boxes", line50, len(scaledSignificantBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d boxes crossing 50%% line (center region)", len(centerBoxes)) + + // DEBUG: Save step 7 - center boxes visualization + centerBoxesVis := scaled.Clone() + // Draw 50% line + gocv.Line(¢erBoxesVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 3) + // Draw center boxes + for _, box := range centerBoxes { + gocv.Rectangle(¢erBoxesVis, box, color.RGBA{0, 255, 255, 255}, 2) + } + gocv.IMWrite("test_output/layout_step7_center_boxes.jpg", centerBoxesVis) + centerBoxesVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step7_center_boxes.jpg") + + // STEP 7: From center boxes, identify digit displays (height >= 110px) + var digitDisplays []image.Rectangle + for _, box := range centerBoxes { + logMessage(LogFile, Debug, " Center box: X=%d-%d (W=%d), Y=%d-%d (H=%d)", + box.Min.X, box.Max.X, box.Dx(), + box.Min.Y, box.Max.Y, box.Dy()) + if box.Dy() >= 110 { + digitDisplays = append(digitDisplays, box) + logMessage(LogFile, Debug, " āœ“ Added as digit display (H=%d >= 110)", box.Dy()) + } else { + logMessage(LogFile, Debug, " āœ— Skipped (H=%d < 110)", box.Dy()) + } + } + + if len(digitDisplays) == 0 { + logMessage(Both, Error, " ERROR: No digit displays found (height >= 110px) in center region") + logMessage(Both, Error, " Found %d center boxes, none met height requirement", len(centerBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d digit displays (height > 120px) BEFORE trimming", len(digitDisplays)) + + // DEBUG: Save step 8 - digit displays visualization + if DEBUG_MODE { + digitDisplaysVis := scaled.Clone() + for _, box := range digitDisplays { + gocv.Rectangle(&digitDisplaysVis, box, color.RGBA{255, 0, 255, 255}, 3) + } + gocv.IMWrite("test_output/layout_step8_digit_displays.jpg", digitDisplaysVis) + digitDisplaysVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step8_digit_displays.jpg") + } + + // STEP 8: Get Y range from digit displays + minDigitY := digitDisplays[0].Min.Y + maxDigitY := digitDisplays[0].Max.Y + for _, box := range digitDisplays { + if box.Min.Y < minDigitY { + minDigitY = box.Min.Y + } + if box.Max.Y > maxDigitY { + maxDigitY = box.Max.Y + } + } + logMessage(LogFile, Debug, " Digit display Y range: %d-%d", minDigitY, maxDigitY) + + // STEP 9: Find ALL boxes (from scaledAllBoxes) within this Y range + var boxesInRange []image.Rectangle + for _, box := range scaledAllBoxes { + // Box overlaps with Y range if its bottom is below minDigitY and top is above maxDigitY + if box.Max.Y > minDigitY && box.Min.Y < maxDigitY { + boxesInRange = append(boxesInRange, box) + } + } + logMessage(LogFile, Debug, " Found %d boxes in Y range %d-%d", len(boxesInRange), minDigitY, maxDigitY) + + // DEBUG: Save step 9 - boxes in range visualization + boxesInRangeVis := scaled.Clone() + // Draw Y range lines + gocv.Line(&boxesInRangeVis, image.Pt(0, minDigitY), image.Pt(boxesInRangeVis.Cols(), minDigitY), color.RGBA{255, 0, 0, 255}, 2) + gocv.Line(&boxesInRangeVis, image.Pt(0, maxDigitY), image.Pt(boxesInRangeVis.Cols(), maxDigitY), color.RGBA{255, 0, 0, 255}, 2) + // Draw digit displays in magenta + for _, box := range digitDisplays { + gocv.Rectangle(&boxesInRangeVis, box, color.RGBA{255, 0, 255, 255}, 3) + } + // Draw all boxes in range in cyan + for _, box := range boxesInRange { + gocv.Rectangle(&boxesInRangeVis, box, color.RGBA{0, 255, 255, 255}, 1) + } + gocv.IMWrite("test_output/layout_step9_boxes_in_range.jpg", boxesInRangeVis) + boxesInRangeVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step9_boxes_in_range.jpg") + + // STEP 10: Trim digit displays based on small boxes crossing their right edge + for i := range digitDisplays { + originalMaxX := digitDisplays[i].Max.X + newMaxX := originalMaxX + + // Check each box in range + for _, smallBox := range boxesInRange { + // Skip if this is the digit display itself + if smallBox == digitDisplays[i] { + continue + } + + // Check if small box crosses the right edge of this digit display + // Crosses if: smallBox.Min.X < digitDisplay.Max.X AND smallBox.Max.X > digitDisplay.Max.X + if smallBox.Min.X < digitDisplays[i].Max.X && smallBox.Max.X > digitDisplays[i].Max.X { + // Use the left edge of the small box as the new right edge + if smallBox.Min.X < newMaxX { + newMaxX = smallBox.Min.X + logMessage(LogFile, Debug, " Trimming digit display %d: right edge %d -> %d (small box at X=%d-%d)", + i, originalMaxX, newMaxX, smallBox.Min.X, smallBox.Max.X) + } + } + } + + digitDisplays[i].Max.X = newMaxX + } + + logMessage(LogFile, Debug, " Digit displays AFTER trimming: %d", len(digitDisplays)) + + // DEBUG: Save step 11 - trimmed digit displays visualization + trimmedVis := scaled.Clone() + for _, box := range digitDisplays { + gocv.Rectangle(&trimmedVis, box, color.RGBA{0, 255, 0, 255}, 4) + } + gocv.IMWrite("test_output/layout_step11_trimmed_displays.jpg", trimmedVis) + trimmedVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step11_trimmed_displays.jpg") + + if len(digitDisplays) < 2 { + // Save debug visualization showing what was found + debugVis := scaled.Clone() + + // Draw 50% line + gocv.Line(&debugVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 2) + gocv.PutText(&debugVis, fmt.Sprintf("50%% line: Y=%d", line50), + image.Pt(10, line50-10), gocv.FontHersheyPlain, 1.5, color.RGBA{255, 0, 0, 255}, 2) + + // Draw all center boxes with labels + for i, box := range centerBoxes { + boxColor := color.RGBA{0, 255, 255, 255} // Cyan for rejected + label := fmt.Sprintf("#%d: H=%d", i, box.Dy()) + + // Check if this box qualified as digit display + qualified := false + for _, dd := range digitDisplays { + if box == dd { + qualified = true + break + } + } + + if qualified { + boxColor = color.RGBA{0, 255, 0, 255} // Green for qualified + label += " OK" + } else if box.Dy() < 110 { + label += " TOO SHORT" + } + + gocv.Rectangle(&debugVis, box, boxColor, 3) + gocv.PutText(&debugVis, label, + image.Pt(box.Min.X+5, box.Min.Y+25), + gocv.FontHersheyPlain, 1.5, boxColor, 2) + } + + // Add summary at top + summary := fmt.Sprintf("Found: %d center boxes, %d digit displays (need 2)", + len(centerBoxes), len(digitDisplays)) + gocv.PutText(&debugVis, summary, + image.Pt(10, 30), gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 255, 255}, 2) + gocv.PutText(&debugVis, "Requirement: Height >= 110px", + image.Pt(10, 60), gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 255, 255}, 2) + + timestamp := time.Now().Format("20060102_150405") + errorFilename := fmt.Sprintf("test_output/layout_error_%s.jpg", timestamp) + gocv.IMWrite(errorFilename, debugVis) + debugVis.Close() + + // Enhanced error message + logMessage(Both, Error, " ERROR: Only %d digit display(s) found (need 2)", len(digitDisplays)) + logMessage(Both, Error, " ") + logMessage(Both, Error, " LIKELY CAUSE: Pulse oximeter is not centered under camera") + logMessage(Both, Error, " - Device may be at an angle or offset to one side") + logMessage(Both, Error, " - Only one display (SpO2 or HR) is in the center detection region") + logMessage(Both, Error, " ") + logMessage(Both, Error, " ACTION REQUIRED: Physically reposition camera or device") + logMessage(Both, Error, " - Move device to be centered and level under camera") + logMessage(Both, Error, " - Or adjust camera angle to capture both displays") + logMessage(Both, Error, " ") + logMessage(Both, Error, " Technical details:") + logMessage(Both, Error, " Center boxes found: %d (crossing 50%% line at Y=%d)", len(centerBoxes), line50) + for i, box := range centerBoxes { + qualified := "" + for _, dd := range digitDisplays { + if box == dd { + qualified = " āœ“ QUALIFIED" + break + } + } + if qualified == "" { + if box.Dy() < 110 { + qualified = " āœ— TOO SHORT (< 110px)" + } + } + logMessage(Both, Error, " Box #%d: W=%dpx, H=%dpx at X=%d-%d, Y=%d-%d%s", + i, box.Dx(), box.Dy(), box.Min.X, box.Max.X, box.Min.Y, box.Max.Y, qualified) + } + logMessage(Both, Error, " šŸ’¾ Debug image saved: %s", errorFilename) + logMessage(Both, Error, " Green boxes = qualified, Cyan = rejected") + return nil, 0, nil + } + + // STEP 11: Get Y range from digit displays + minCenterY := digitDisplays[0].Min.Y + maxCenterY := digitDisplays[0].Max.Y + + for _, box := range digitDisplays { + if box.Min.Y < minCenterY { + minCenterY = box.Min.Y + } + if box.Max.Y > maxCenterY { + maxCenterY = box.Max.Y + } + } + + // Create center region + centerRegion := image.Rect(scaledBoundingBox.Min.X, minCenterY, scaledBoundingBox.Max.X, maxCenterY) + + // STEP 12: Find X-center to split left/right + centerX := centerRegion.Min.X + centerRegion.Dx()/2 + logMessage(LogFile, Debug, " Center X: %d", centerX) + + // STEP 13: Find rightmost X and Y range in each half + spo2RightX := -1 + spo2MinY := 10000 + spo2MaxY := 0 + hrRightX := -1 + hrMinY := 10000 + hrMaxY := 0 + + for _, box := range digitDisplays { + // Determine which half this box belongs to based on its center + boxCenterX := box.Min.X + box.Dx()/2 + + if boxCenterX < centerX { + // Left half (SpO2) + if box.Max.X > spo2RightX { + spo2RightX = box.Max.X + } + if box.Min.Y < spo2MinY { + spo2MinY = box.Min.Y + } + if box.Max.Y > spo2MaxY { + spo2MaxY = box.Max.Y + } + } else { + // Right half (HR) + if box.Max.X > hrRightX { + hrRightX = box.Max.X + } + if box.Min.Y < hrMinY { + hrMinY = box.Min.Y + } + if box.Max.Y > hrMaxY { + hrMaxY = box.Max.Y + } + } + } + + if spo2RightX == -1 || hrRightX == -1 { + logMessage(Both, Error, " ERROR: Could not find displays in both halves") + logMessage(Both, Error, " Left half (SpO2): rightX=%d, Right half (HR): rightX=%d", spo2RightX, hrRightX) + return nil, 0, nil + } + + // STEP 14: Create boxes with fixed CUT_WIDTH + spo2LeftX := spo2RightX - CUT_WIDTH + spo2Box := image.Rect(spo2LeftX, spo2MinY, spo2RightX, spo2MaxY) + + hrLeftX := hrRightX - CUT_WIDTH + hrBox := image.Rect(hrLeftX, hrMinY, hrRightX, hrMaxY) + + logMessage(LogFile, Debug, " Final SpO2 box: X=%d-%d, Y=%d-%d", spo2Box.Min.X, spo2Box.Max.X, spo2Box.Min.Y, spo2Box.Max.Y) + logMessage(LogFile, Debug, " Final HR box: X=%d-%d, Y=%d-%d", hrBox.Min.X, hrBox.Max.X, hrBox.Min.Y, hrBox.Max.Y) + + // DEBUG: Save step 15 - final boxes visualization + finalVis := scaled.Clone() + gocv.Rectangle(&finalVis, spo2Box, color.RGBA{255, 0, 0, 255}, 4) + gocv.Rectangle(&finalVis, hrBox, color.RGBA{0, 255, 255, 255}, 4) + gocv.PutText(&finalVis, "SpO2", image.Pt(spo2Box.Min.X, spo2Box.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, color.RGBA{255, 0, 0, 255}, 2) + gocv.PutText(&finalVis, "HR", image.Pt(hrBox.Min.X, hrBox.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, color.RGBA{0, 255, 255, 255}, 2) + gocv.IMWrite("test_output/layout_step15_final_boxes.jpg", finalVis) + finalVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step15_final_boxes.jpg") + + logMessage(LogFile, Debug, " [TIMING] Total layout detection: %dms", time.Since(startTime).Milliseconds()) + + // Return layout and scale factor + return &ScreenLayout{ + SpO2Area: spo2Box, + HRArea: hrBox, + }, scale, nil +} + +// Old functions kept for backward compatibility but deprecated +func groupAdjacentContours(rects []image.Rectangle) []Display { + var displays []Display + used := make([]bool, len(rects)) + + logMessage(LogFile, Debug, " Grouping %d candidate contours:", len(rects)) + for i := 0; i < len(rects); i++ { + if used[i] { + continue + } + + // Look for an adjacent rect with similar Y coordinate + var adjacent *int + for j := i + 1; j < len(rects); j++ { + if used[j] { + continue + } + + // Check if Y coordinates are similar (within 20 pixels) + yDiff := rects[i].Min.Y - rects[j].Min.Y + if yDiff < 0 { + yDiff = -yDiff + } + if yDiff > 20 { + logMessage(LogFile, Debug, " Skip pairing rect[%d] and rect[%d]: Y diff=%d > 20", i, j, yDiff) + continue + } + + // Check if they're side-by-side (allow small overlap or gap) + xGap := rects[j].Min.X - rects[i].Max.X + // Allow up to 50 pixels of overlap (negative gap) or up to 200 pixels of space + if xGap >= -50 && xGap < 200 { + logMessage(LogFile, Debug, " Pairing rect[%d] and rect[%d]: Y diff=%d, X gap=%d", i, j, yDiff, xGap) + adjacent = &j + break + } else { + logMessage(LogFile, Debug, " Skip pairing rect[%d] and rect[%d]: X gap=%d not in range [-50, 200)", i, j, xGap) + } + } + + if adjacent != nil { + // Two separate contours - use them as left and right + used[i] = true + used[*adjacent] = true + + left := rects[i] + right := rects[*adjacent] + + // Ensure left is actually on the left + if left.Min.X > right.Min.X { + left, right = right, left + } + + logMessage(LogFile, Debug, " DUAL: [%dx%d @(%d,%d)] + [%dx%d @(%d,%d)]", + left.Dx(), left.Dy(), left.Min.X, left.Min.Y, + right.Dx(), right.Dy(), right.Min.X, right.Min.Y) + + displays = append(displays, Display{ + IsSplit: false, + LeftRect: left, + RightRect: right, + }) + } else { + // Single contour - will be split in half + used[i] = true + + logMessage(LogFile, Debug, " SPLIT: [%dx%d @(%d,%d)]", + rects[i].Dx(), rects[i].Dy(), rects[i].Min.X, rects[i].Min.Y) + + displays = append(displays, Display{ + IsSplit: true, + FullRect: rects[i], + }) + } + } + + return displays +} + +func detectScreenLayout(rotated gocv.Mat) ([]Display, error) { + gray := gocv.NewMat() + gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray) + thresh := gocv.NewMat() + gocv.Threshold(gray, &thresh, 170, 255, gocv.ThresholdBinary) + gray.Close() + contours := gocv.FindContours(thresh, gocv.RetrievalExternal, gocv.ChainApproxSimple) + thresh.Close() + + var rects []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + if rect.Dy() > MIN_BOX_HEIGHT && rect.Dy() < 200 { + rects = append(rects, rect) + } + } + contours.Close() + + sort.Slice(rects, func(i, j int) bool { + return rects[i].Dy() > rects[j].Dy() + }) + + if len(rects) < 3 { + return nil, fmt.Errorf("found only %d contours (need 3+)", len(rects)) + } + + // Take up to 6 candidates to ensure we capture split digits + maxCands := 6 + if len(rects) < maxCands+1 { + maxCands = len(rects) - 1 + } + cands := rects[1 : 1+maxCands] + + // Sort by Y position for processing (Y desc, then X asc) + sort.Slice(cands, func(i, j int) bool { + if cands[i].Min.Y != cands[j].Min.Y { + return cands[i].Min.Y > cands[j].Min.Y + } + return cands[i].Min.X < cands[j].Min.X + }) + + logMessage(LogFile, Debug, " Selected %d candidates (excluding largest):", len(cands)) + for idx, rect := range cands { + logMessage(LogFile, Debug, " [%d]: %dx%d @(%d,%d)", idx, rect.Dx(), rect.Dy(), rect.Min.X, rect.Min.Y) + } + + // Group adjacent contours + displays := groupAdjacentContours(cands) + + logMessage(LogFile, Debug, " Found %d displays after grouping", len(displays)) + + // Group displays by Y position to find the digit row + // Displays within 30 pixels vertically are considered same row + type YGroup struct { + avgY int + displays []Display + } + var yGroups []YGroup + + for _, disp := range displays { + dispY := disp.FullRect.Min.Y + if !disp.IsSplit { + dispY = disp.LeftRect.Min.Y + } + + // Find existing group with similar Y + found := false + for i := range yGroups { + if math.Abs(float64(dispY-yGroups[i].avgY)) < 30 { + yGroups[i].displays = append(yGroups[i].displays, disp) + found = true + break + } + } + if !found { + yGroups = append(yGroups, YGroup{avgY: dispY, displays: []Display{disp}}) + } + } + + logMessage(LogFile, Debug, " Grouped into %d Y-bands:", len(yGroups)) + for idx, group := range yGroups { + logMessage(LogFile, Debug, " Band %d (Y~%d): %d displays", idx, group.avgY, len(group.displays)) + } + + // Find the band with exactly 2 displays (SpO2 and HR) + var digitDisplays []Display + for _, group := range yGroups { + if len(group.displays) == 2 { + digitDisplays = group.displays + break + } + } + + if len(digitDisplays) != 2 { + return nil, fmt.Errorf("could not find Y-band with exactly 2 displays (found %d bands)", len(yGroups)) + } + + // Sort by X position: left = SpO2, right = HR + sort.Slice(digitDisplays, func(i, j int) bool { + xI := digitDisplays[i].FullRect.Min.X + if !digitDisplays[i].IsSplit { + xI = digitDisplays[i].LeftRect.Min.X + } + xJ := digitDisplays[j].FullRect.Min.X + if !digitDisplays[j].IsSplit { + xJ = digitDisplays[j].LeftRect.Min.X + } + return xI < xJ + }) + + return digitDisplays, nil +} + +func saveLayoutVisualization(rotated gocv.Mat, layout *ScreenLayout, filename string) { + visualization := rotated.Clone() + + // Draw SpO2 box in red + red := color.RGBA{255, 0, 0, 255} + gocv.Rectangle(&visualization, layout.SpO2Area, red, 3) + gocv.PutText(&visualization, "SpO2", image.Pt(layout.SpO2Area.Min.X, layout.SpO2Area.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, red, 2) + + // Draw HR box in cyan + cyan := color.RGBA{0, 255, 255, 255} + gocv.Rectangle(&visualization, layout.HRArea, cyan, 3) + gocv.PutText(&visualization, "HR", image.Pt(layout.HRArea.Min.X, layout.HRArea.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, cyan, 2) + + gocv.IMWrite(filename, visualization) + visualization.Close() +} diff --git a/backups/backup_20251030_043342/normalize.go b/backups/backup_20251030_043342/normalize.go new file mode 100644 index 0000000..570a9e1 --- /dev/null +++ b/backups/backup_20251030_043342/normalize.go @@ -0,0 +1,167 @@ +package main + +import ( + "fmt" + "image" + "image/color" + + "gocv.io/x/gocv" +) + +// ContourInfo tracks a detected contour and whether it was used +type ContourInfo struct { + rect image.Rectangle + used bool +} + +// detectScreenWidth finds the screen width via contours (returns 0 on failure) +// If debugPath is not empty, saves an annotated image showing the detection +func detectScreenWidth(frame gocv.Mat, debugPath string) int { + gray := gocv.NewMat() + defer gray.Close() + gocv.CvtColor(frame, &gray, gocv.ColorBGRToGray) + + thresh := gocv.NewMat() + defer thresh.Close() + gocv.Threshold(gray, &thresh, 170, 255, gocv.ThresholdBinary) + + contours := gocv.FindContours(thresh, gocv.RetrievalExternal, gocv.ChainApproxSimple) + defer contours.Close() + + if contours.Size() == 0 { + return 0 + } + + // Collect all contours and track which are used + var allContours []ContourInfo + + screenMinX := 999999 + screenMaxX := 0 + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + used := rect.Dx() > 50 || rect.Dy() > 50 + allContours = append(allContours, ContourInfo{rect: rect, used: used}) + + if used { + if rect.Min.X < screenMinX { + screenMinX = rect.Min.X + } + if rect.Max.X > screenMaxX { + screenMaxX = rect.Max.X + } + } + } + + screenWidth := screenMaxX - screenMinX + if screenWidth < 100 { + return 0 + } + + // Save debug image if requested + if debugPath != "" { + saveScreenWidthDebug(frame, allContours, screenMinX, screenMaxX, screenWidth, debugPath) + } + + return screenWidth +} + +// saveScreenWidthDebug creates an annotated image showing screen width detection +func saveScreenWidthDebug(frame gocv.Mat, contours []ContourInfo, minX, maxX, width int, path string) { + debug := frame.Clone() + defer debug.Close() + + // Draw all contours + for i, c := range contours { + if c.used { + // Used contours in green + green := color.RGBA{0, 255, 0, 255} + gocv.Rectangle(&debug, c.rect, green, 2) + gocv.PutText(&debug, fmt.Sprintf("#%d", i), + image.Pt(c.rect.Min.X, c.rect.Min.Y-5), + gocv.FontHersheyPlain, 1.0, green, 1) + } else { + // Unused contours in gray + gray := color.RGBA{100, 100, 100, 255} + gocv.Rectangle(&debug, c.rect, gray, 1) + } + } + + // Draw vertical lines at min and max X + red := color.RGBA{255, 0, 0, 255} + gocv.Line(&debug, image.Pt(minX, 0), image.Pt(minX, debug.Rows()), red, 3) + gocv.Line(&debug, image.Pt(maxX, 0), image.Pt(maxX, debug.Rows()), red, 3) + + // Add text annotations + yellow := color.RGBA{255, 255, 0, 255} + gocv.PutText(&debug, fmt.Sprintf("Screen MinX: %d", minX), + image.Pt(minX+10, 30), gocv.FontHersheyDuplex, 0.8, yellow, 2) + gocv.PutText(&debug, fmt.Sprintf("Screen MaxX: %d", maxX), + image.Pt(maxX-200, 30), gocv.FontHersheyDuplex, 0.8, yellow, 2) + gocv.PutText(&debug, fmt.Sprintf("Screen Width: %dpx", width), + image.Pt(minX+10, 60), gocv.FontHersheyDuplex, 0.8, yellow, 2) + gocv.PutText(&debug, fmt.Sprintf("Total contours: %d", len(contours)), + image.Pt(minX+10, 90), gocv.FontHersheyDuplex, 0.8, yellow, 2) + + gocv.IMWrite(path, debug) + fmt.Printf(" Saved width detection debug: %s\n", path) +} + +// applyScale resizes frame by the given scale factor +func applyScale(frame gocv.Mat, scale float64) gocv.Mat { + if scale == 1.0 { + return frame // No scaling needed + } + + newWidth := int(float64(frame.Cols()) * scale) + newHeight := int(float64(frame.Rows()) * scale) + + scaled := gocv.NewMat() + gocv.Resize(frame, &scaled, image.Pt(newWidth, newHeight), 0, 0, gocv.InterpolationLinear) + + return scaled +} + +// normalizeToWidth resizes frame to make it targetWidth pixels wide +func normalizeToWidth(frame gocv.Mat, targetWidth int) gocv.Mat { + if frame.Cols() == targetWidth { + return frame // Already correct size + } + + scale := float64(targetWidth) / float64(frame.Cols()) + newWidth := targetWidth + newHeight := int(float64(frame.Rows()) * scale) + + normalized := gocv.NewMat() + gocv.Resize(frame, &normalized, image.Pt(newWidth, newHeight), 0, 0, gocv.InterpolationLinear) + + return normalized +} + +// detectLayoutWithNormalization performs full layout detection with screen width normalization +// Returns: layout, scale factor, error +func detectLayoutWithNormalization(frame gocv.Mat) (*ScreenLayout, float64, error) { + // 1. Detect screen width + screenWidth := detectScreenWidth(frame, "") + if screenWidth == 0 { + return nil, 0, fmt.Errorf("failed to detect screen width") + } + + // 2. Calculate and apply scale + const targetWidth = 860 + scale := float64(targetWidth) / float64(screenWidth) + normalized := applyScale(frame, scale) + if normalized.Ptr() != frame.Ptr() { + defer normalized.Close() + } + + // 3. Detect layout on normalized frame + layout, rescaled, err := detectScreenLayoutAreas(normalized) + if !rescaled.Empty() { + rescaled.Close() + } + if err != nil { + return nil, 0, err + } + + return layout, scale, nil +} diff --git a/backups/backup_20251030_043342/ocr.go b/backups/backup_20251030_043342/ocr.go new file mode 100644 index 0000000..0bda3f2 --- /dev/null +++ b/backups/backup_20251030_043342/ocr.go @@ -0,0 +1,751 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "io" + "os" + "strings" + "time" + + "gocv.io/x/gocv" +) + +func loadTemplates() (map[int][]gocv.Mat, error) { + templates := make(map[int][]gocv.Mat) + files, err := os.ReadDir("training_digits") + if err != nil { + logMessage(Console, Warning, "āš ļø WARNING: training_digits directory not found or cannot be read: %v", err) + logMessage(Console, Warning, "āš ļø Starting with empty template set. Recognition will not work until templates are added.") + logMessage(Console, Info, "") + return templates, nil + } + templateCount := 0 + for _, file := range files { + if !strings.HasSuffix(file.Name(), ".png") { + continue + } + name := strings.TrimSuffix(file.Name(), ".png") + parts := strings.Split(name, "_") + var digit int + _, err := fmt.Sscanf(parts[0], "%d", &digit) + if err != nil || digit < 0 || digit > 9 { + continue + } + filename := fmt.Sprintf("training_digits/%s", file.Name()) + template := gocv.IMRead(filename, gocv.IMReadGrayScale) + if template.Empty() { + logMessage(Console, Warning, "Warning: Failed to load %s", filename) + continue + } + templates[digit] = append(templates[digit], template) + templateCount++ + } + + // Load invalid digit markers (use index -1 for invalid patterns) + invalidFiles, err := os.ReadDir("training_digits/invalid") + if err == nil { + invalidCount := 0 + for _, file := range invalidFiles { + if !strings.HasSuffix(file.Name(), ".png") { + continue + } + filename := fmt.Sprintf("training_digits/invalid/%s", file.Name()) + template := gocv.IMRead(filename, gocv.IMReadGrayScale) + if template.Empty() { + logMessage(Console, Warning, "Warning: Failed to load invalid template %s", filename) + continue + } + templates[-1] = append(templates[-1], template) + invalidCount++ + } + if invalidCount > 0 { + logMessage(Console, Info, "āœ“ Loaded %d invalid digit markers", invalidCount) + } + } + + for digit := 0; digit <= 9; digit++ { + if len(templates[digit]) == 0 { + logMessage(Console, Warning, "āš ļø WARNING: No templates found for digit %d", digit) + } + } + logMessage(Console, Info, "āœ“ Loaded %d digit templates (0-9)", templateCount) + return templates, nil +} + +func matchDigit(digitImg gocv.Mat, templates map[int][]gocv.Mat) (int, float64) { + bestDigit := -1 + bestScore := 0.0 + bestInvalidScore := 0.0 + + // Check invalid patterns first (stored at index -1) + invalidTemplates := templates[-1] + for _, template := range invalidTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestInvalidScore { + bestInvalidScore = score + } + } + + // Check regular digits (0-9) + for digit := 0; digit <= 9; digit++ { + digitTemplates := templates[digit] + if len(digitTemplates) == 0 { + // No templates for this digit, skip + continue + } + for _, template := range digitTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestScore { + bestScore = score + bestDigit = digit + } + } + } + + // If invalid pattern matches better than any digit (and score > 70%), return -1 + if bestInvalidScore > bestScore && bestInvalidScore > 70 { + return -1, bestInvalidScore + } + + return bestDigit, bestScore +} + +// Helper function: check if there's a '1' digit by cutting at the expected width and matching templates +// ONLY checks against '1' templates to prevent false matches with '4', '7', etc. +func hasOneAt(thresh gocv.Mat, x int, templates map[int][]gocv.Mat, logger io.Writer) bool { + h := thresh.Rows() + w := thresh.Cols() + + // Calculate the region where a '1' would be if it exists + // '1' is 72px wide, so extract from (x - 72) to x + leftEdge := x - DIGIT_ONE_WIDTH + if leftEdge < 0 { + leftEdge = 0 + } + + if x > w { + x = w + } + + logMessage(Both, Debug, " hasOneAt(x=%d): extracting region [%d..%d] (width=%d) from display width=%d", + x, leftEdge, x, x-leftEdge, w) + + // Extract the potential '1' region + region := thresh.Region(image.Rect(leftEdge, 0, x, h)) + digitImg := region.Clone() + region.Close() + + // Match ONLY against '1' templates (don't check other digits) + oneTemplates := templates[1] + if len(oneTemplates) == 0 { + logMessage(LogFile, Warning, " hasOneAt(x=%d): No '1' templates loaded -> NO", x) + digitImg.Close() + return false + } + + bestScore := 0.0 + for _, template := range oneTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestScore { + bestScore = score + } + } + digitImg.Close() + + // If it matches '1' with >85% confidence, we have a '1' + if bestScore > 85.0 { + logMessage(Both, Debug, " hasOneAt(x=%d): matched '1' with %.1f%% confidence -> YES", x, bestScore) + return true + } + + logMessage(Both, Debug, " hasOneAt(x=%d): best '1' match %.1f%% confidence -> NO", x, bestScore) + return false +} + +// validateEight checks if a digit image has the two characteristic holes of an '8' +// by checking for black pixels along horizontal lines where holes should be +func validateEight(digitImg gocv.Mat, logger io.Writer) bool { + h := digitImg.Rows() + w := digitImg.Cols() + + // Upper hole: 30% of height + // Lower hole: 70% of height + // Horizontal range: 40%-50% of width + topHoleY := (h * 30) / 100 + bottomHoleY := (h * 70) / 100 + xStart := (w * 40) / 100 + xEnd := (w * 50) / 100 + + // Check for ANY black pixel in the horizontal range + // In thresholded image: black (0) = hole, white (255) = digit + topHoleFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(topHoleY, x) + if pixel < 128 { + topHoleFound = true + break + } + } + + bottomHoleFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(bottomHoleY, x) + if pixel < 128 { + bottomHoleFound = true + break + } + } + + logMessage(LogFile, Debug, " validateEight: top hole @ y=%d (30%%) x=%d-%d (%v), bottom hole @ y=%d (70%%) x=%d-%d (%v)", + topHoleY, xStart, xEnd, topHoleFound, + bottomHoleY, xStart, xEnd, bottomHoleFound) + + return topHoleFound && bottomHoleFound +} + +// validateZero checks if a digit image has the characteristic hole of a '0' +// by checking for black pixels along a horizontal line at the center +func validateZero(digitImg gocv.Mat, logger io.Writer) bool { + h := digitImg.Rows() + w := digitImg.Cols() + + // Center hole: 50% of height + // Horizontal range: 30%-70% of width (wider range than '8' since '0' hole is larger) + centerY := h / 2 + xStart := (w * 30) / 100 + xEnd := (w * 70) / 100 + + // Check for ANY black pixel in the horizontal range + // In thresholded image: black (0) = hole, white (255) = digit + holeFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(centerY, x) + if pixel < 128 { + holeFound = true + break + } + } + + logMessage(LogFile, Debug, " validateZero: center hole @ y=%d (50%%) x=%d-%d (%v)", + centerY, xStart, xEnd, holeFound) + + return holeFound +} + +// decolorizeAlarmMode converts colored alarm backgrounds (yellow/orange, cyan/blue) to black +// while preserving white digits, so normal OCR thresholding works correctly +func decolorizeAlarmMode(displayImg gocv.Mat) gocv.Mat { + // Create output image + decolorized := gocv.NewMatWithSize(displayImg.Rows(), displayImg.Cols(), displayImg.Type()) + + // Process each pixel + for y := 0; y < displayImg.Rows(); y++ { + for x := 0; x < displayImg.Cols(); x++ { + // Get BGR values + b := displayImg.GetUCharAt(y, x*3) // Blue + g := displayImg.GetUCharAt(y, x*3+1) // Green + r := displayImg.GetUCharAt(y, x*3+2) // Red + + // If pixel is "white-ish" (all channels > 240), keep it white + // Otherwise (colored background, borders, etc.), set to black + if r > 240 && g > 240 && b > 240 { + // White digit - keep as white + decolorized.SetUCharAt(y, x*3, 255) + decolorized.SetUCharAt(y, x*3+1, 255) + decolorized.SetUCharAt(y, x*3+2, 255) + } else { + // Colored background or dark area - set to black + decolorized.SetUCharAt(y, x*3, 0) + decolorized.SetUCharAt(y, x*3+1, 0) + decolorized.SetUCharAt(y, x*3+2, 0) + } + } + } + + return decolorized +} + +// New simplified version that works with fixed-width rectangles +func recognizeDisplayArea(rotated gocv.Mat, area image.Rectangle, templates map[int][]gocv.Mat, displayName string, frameCount int, logger io.Writer) (int, int, float64, int, float64) { + startTime := time.Now() + cleanName := strings.TrimSpace(strings.ToLower(displayName)) + + // Extract the display area (ALREADY THRESHOLDED BINARY IMAGE) + region := rotated.Region(area) + thresh := region.Clone() + region.Close() + + // DEBUG: Save extracted region - already thresholded + if DEBUG_MODE { + step1Filename := fmt.Sprintf("review/f%d_%s_step1_extracted.png", frameCount, cleanName) + gocv.IMWrite(step1Filename, thresh) + } + + w := thresh.Cols() + h := thresh.Rows() + + // Log display dimensions + logMessage(LogFile, Debug, " %s display: w=%d, h=%d", displayName, w, h) + + // Sequential check for '1' patterns to determine digit widths + // hasOneAt checks if column X is white and column X+10 is black (detects RIGHT EDGE where '1' ends) + + // Digit detection (right to left: digit3, digit2, digit1) + // D stores cut positions, digitIsOne stores detection results + D := make([]int, 4) // indices 1,2,3 (0 unused) + digitIsOne := make([]bool, 4) // indices 1,2,3 (0 unused) + + totalWidth := 0 + + logMessage(Both, Debug, " [%s] Display width=%d, starting digit detection...", displayName, w) + + detectionStart := time.Now() + for i := 3; i >= 1; i-- { + var checkPos int + if i == 3 { + checkPos = w // Check at right edge, not w-8 + } else { + checkPos = w - totalWidth - 5 + } + + logMessage(Both, Debug, " [%s] Digit %d: checkPos=%d (w=%d, totalWidth=%d)", displayName, i, checkPos, w, totalWidth) + digitIsOne[i] = hasOneAt(thresh, checkPos, templates, logger) + + // Add width to accumulator + var widthUsed int + if digitIsOne[i] { + widthUsed = DIGIT_ONE_WIDTH + totalWidth += DIGIT_ONE_WIDTH + logMessage(Both, Debug, " [%s] Digit %d: IS ONE -> using width=%d (totalWidth now %d)", displayName, i, widthUsed, totalWidth) + } else { + widthUsed = DIGIT_NON_ONE_WIDTH + totalWidth += DIGIT_NON_ONE_WIDTH + logMessage(Both, Debug, " [%s] Digit %d: NOT ONE -> using width=%d (totalWidth now %d)", displayName, i, widthUsed, totalWidth) + } + + D[i] = w - totalWidth + logMessage(Both, Debug, " [%s] Digit %d: CUT position D[%d]=%d", displayName, i, i, D[i]) + } + logMessage(LogFile, Debug, " [TIMING][%s] Digit detection (hasOneAt x3): %dms", displayName, time.Since(detectionStart).Milliseconds()) + + logMessage(Both, Debug, " [%s] Final cut positions: D[1]=%d, D[2]=%d, D[3]=%d", displayName, D[1], D[2], D[3]) + + // Create visualization image showing check positions AND cut positions + visImg := gocv.NewMat() + gocv.CvtColor(thresh, &visImg, gocv.ColorGrayToBGR) + + // Draw vertical lines at check positions with labels + greenColor := color.RGBA{0, 255, 0, 255} // Green for '1' detected + redColor := color.RGBA{0, 0, 255, 255} // Red for no '1' + yellowColor := color.RGBA{0, 255, 255, 255} // Yellow for CUT lines + + // Digit 3 check line + digit3Color := redColor + if digitIsOne[3] { + digit3Color = greenColor + } + gocv.Line(&visImg, image.Pt(w-8, 0), image.Pt(w-8, h), digit3Color, 2) + gocv.PutText(&visImg, "D3", image.Pt(w-8+5, 20), gocv.FontHersheyDuplex, 0.5, digit3Color, 1) + + // Digit 2 check line (calculate position from accumulated width) + var digit2Width int + if digitIsOne[3] { + digit2Width = DIGIT_ONE_WIDTH + } else { + digit2Width = DIGIT_NON_ONE_WIDTH + } + digit2CheckPos := w - digit2Width - 5 + digit2Color := redColor + if digitIsOne[2] { + digit2Color = greenColor + } + gocv.Line(&visImg, image.Pt(digit2CheckPos, 0), image.Pt(digit2CheckPos, h), digit2Color, 2) + gocv.PutText(&visImg, "D2", image.Pt(digit2CheckPos+5, 40), gocv.FontHersheyDuplex, 0.5, digit2Color, 1) + + // Digit 1 check line (calculate position from accumulated width) + var digit3Width int + if digitIsOne[3] { + digit3Width = DIGIT_ONE_WIDTH + } else { + digit3Width = DIGIT_NON_ONE_WIDTH + } + var digit2WidthForD1 int + if digitIsOne[2] { + digit2WidthForD1 = DIGIT_ONE_WIDTH + } else { + digit2WidthForD1 = DIGIT_NON_ONE_WIDTH + } + digit1CheckPos := w - digit3Width - digit2WidthForD1 - 5 + digit1Color := redColor + if digitIsOne[1] { + digit1Color = greenColor + } + gocv.Line(&visImg, image.Pt(digit1CheckPos, 0), image.Pt(digit1CheckPos, h), digit1Color, 2) + gocv.PutText(&visImg, "D1", image.Pt(digit1CheckPos+5, 60), gocv.FontHersheyDuplex, 0.5, digit1Color, 1) + + // Draw CUT lines in yellow showing where we actually split the digits + gocv.Line(&visImg, image.Pt(D[2], 0), image.Pt(D[2], h), yellowColor, 3) + gocv.PutText(&visImg, "CUT1", image.Pt(D[2]+5, h-10), gocv.FontHersheyDuplex, 0.6, yellowColor, 2) + + gocv.Line(&visImg, image.Pt(D[3], 0), image.Pt(D[3], h), yellowColor, 3) + gocv.PutText(&visImg, "CUT2", image.Pt(D[3]+5, h-30), gocv.FontHersheyDuplex, 0.6, yellowColor, 2) + + // Save visualization + if DEBUG_MODE { + visFilename := fmt.Sprintf("review/f%d_%s_checks.png", frameCount, cleanName) + gocv.IMWrite(visFilename, visImg) + } + visImg.Close() + + // Clamp cut positions to valid range + if D[3] < 0 { + D[3] = 0 + } + if D[2] < 0 { + D[2] = 0 + } + + // Extract three digit regions + logMessage(Both, Debug, " [%s] Extracting digit1: region [0..%d] (width=%d)", displayName, D[2], D[2]) + digit1Region := thresh.Region(image.Rect(0, 0, D[2], h)) + digit1 := digit1Region.Clone() + digit1Region.Close() + + logMessage(Both, Debug, " [%s] Extracting digit2: region [%d..%d] (width=%d)", displayName, D[2], D[3], D[3]-D[2]) + digit2Region := thresh.Region(image.Rect(D[2], 0, D[3], h)) + digit2 := digit2Region.Clone() + digit2Region.Close() + + logMessage(Both, Debug, " [%s] Extracting digit3: region [%d..%d] (width=%d)", displayName, D[3], w, w-D[3]) + digit3Region := thresh.Region(image.Rect(D[3], 0, w, h)) + digit3 := digit3Region.Clone() + digit3Region.Close() + + // Match all three digits + matchStart := time.Now() + num1, score1 := matchDigit(digit1, templates) + num2, score2 := matchDigit(digit2, templates) + num3, score3 := matchDigit(digit3, templates) + logMessage(LogFile, Debug, " [TIMING][%s] matchDigit x3: %dms", displayName, time.Since(matchStart).Milliseconds()) + + // Validate '8' digits - check for two holes + if num1 == 8 && !validateEight(digit1, logger) { + logMessage(LogFile, Debug, " āš ļø Digit1 recognized as 8 but failed hole validation - marking invalid") + num1 = -1 + score1 = 0.0 + } + if num2 == 8 && !validateEight(digit2, logger) { + logMessage(LogFile, Debug, " āš ļø Digit2 recognized as 8 but failed hole validation - marking invalid") + num2 = -1 + score2 = 0.0 + } + if num3 == 8 && !validateEight(digit3, logger) { + logMessage(LogFile, Debug, " āš ļø Digit3 recognized as 8 but failed hole validation - marking invalid") + num3 = -1 + score3 = 0.0 + } + + // Validate '0' digits - check for center hole + if num1 == 0 && !validateZero(digit1, logger) { + logMessage(LogFile, Debug, " āš ļø Digit1 recognized as 0 but failed hole validation - marking invalid") + num1 = -1 + score1 = 0.0 + } + if num2 == 0 && !validateZero(digit2, logger) { + logMessage(LogFile, Debug, " āš ļø Digit2 recognized as 0 but failed hole validation - marking invalid") + num2 = -1 + score2 = 0.0 + } + if num3 == 0 && !validateZero(digit3, logger) { + logMessage(LogFile, Debug, " āš ļø Digit3 recognized as 0 but failed hole validation - marking invalid") + num3 = -1 + score3 = 0.0 + } + + // Calculate final number + var number int + if digitIsOne[1] { + // 3-digit number: 1XX + number = 100 + num2*10 + num3 + } else { + // 2-digit number: XX (digit1 region is mostly empty/padding) + number = num2*10 + num3 + } + + // Save individual digits + if DEBUG_MODE { + saveStart := time.Now() + digit1Filename := fmt.Sprintf("review/f%d_%s_digit1.png", frameCount, cleanName) + digit2Filename := fmt.Sprintf("review/f%d_%s_digit2.png", frameCount, cleanName) + digit3Filename := fmt.Sprintf("review/f%d_%s_digit3.png", frameCount, cleanName) + gocv.IMWrite(digit1Filename, digit1) + gocv.IMWrite(digit2Filename, digit2) + gocv.IMWrite(digit3Filename, digit3) + logMessage(LogFile, Debug, " [TIMING][%s] Save digit images: %dms", displayName, time.Since(saveStart).Milliseconds()) + } + + // Create labeled full image (combine all three digits) + combinedWidth := digit1.Cols() + digit2.Cols() + digit3.Cols() + combinedHeight := digit1.Rows() + if digit2.Rows() > combinedHeight { + combinedHeight = digit2.Rows() + } + if digit3.Rows() > combinedHeight { + combinedHeight = digit3.Rows() + } + + fullForLabel := gocv.NewMatWithSize(combinedHeight, combinedWidth, digit1.Type()) + + // Copy digit1 + digit1ROI := fullForLabel.Region(image.Rect(0, 0, digit1.Cols(), digit1.Rows())) + digit1.CopyTo(&digit1ROI) + digit1ROI.Close() + + // Copy digit2 + digit2ROI := fullForLabel.Region(image.Rect(digit1.Cols(), 0, digit1.Cols()+digit2.Cols(), digit2.Rows())) + digit2.CopyTo(&digit2ROI) + digit2ROI.Close() + + // Copy digit3 + digit3ROI := fullForLabel.Region(image.Rect(digit1.Cols()+digit2.Cols(), 0, combinedWidth, digit3.Rows())) + digit3.CopyTo(&digit3ROI) + digit3ROI.Close() + + // Add padding at top for label + labeledFull := gocv.NewMat() + gocv.CopyMakeBorder(fullForLabel, &labeledFull, 60, 0, 0, 0, gocv.BorderConstant, color.RGBA{0, 0, 0, 255}) + fullForLabel.Close() + + // Draw label + label := fmt.Sprintf("%d", number) + textColor := color.RGBA{255, 255, 255, 255} + gocv.PutText(&labeledFull, label, image.Pt(10, 45), gocv.FontHersheyDuplex, 1.8, textColor, 3) + + // Save labeled full image + if DEBUG_MODE { + fullFilename := fmt.Sprintf("review/f%d_%s_full.png", frameCount, cleanName) + gocv.IMWrite(fullFilename, labeledFull) + } + labeledFull.Close() + + digit1.Close() + digit2.Close() + digit3.Close() + thresh.Close() + + // Calculate widths for logging + digit3Width = DIGIT_ONE_WIDTH + if !digitIsOne[3] { + digit3Width = DIGIT_NON_ONE_WIDTH + } + digit2Width = DIGIT_ONE_WIDTH + if !digitIsOne[2] { + digit2Width = DIGIT_NON_ONE_WIDTH + } + + // Print debug info to log only + if digitIsOne[1] { + logMessage(LogFile, Info, " %s: 1%d%d [3-DIGIT: d1=1@%dpx (x=0-%d), d2=%s@%dpx (x=%d-%d), d3=%s@%dpx (x=%d-%d)] (scores: %d=%.0f%%, %d=%.0f%%, %d=%.0f%%)", + displayName, num2, num3, D[2], D[2], + map[bool]string{true: "1", false: "X"}[digitIsOne[2]], digit2Width, D[2], D[3], + map[bool]string{true: "1", false: "X"}[digitIsOne[3]], digit3Width, D[3], w, + num1, score1, num2, score2, num3, score3) + } else { + logMessage(LogFile, Info, " %s: %d%d [2-DIGIT: d2=%s@%dpx (x=%d-%d), d3=%s@%dpx (x=%d-%d)] (scores: %d=%.0f%%, %d=%.0f%%)", + displayName, num2, num3, + map[bool]string{true: "1", false: "X"}[digitIsOne[2]], digit2Width, D[2], D[3], + map[bool]string{true: "1", false: "X"}[digitIsOne[3]], digit3Width, D[3], w, + num2, score2, num3, score3) + } + + logMessage(LogFile, Debug, " [TIMING][%s] Total recognizeDisplayArea: %dms", displayName, time.Since(startTime).Milliseconds()) + + // Return using middle two digits for compatibility with existing code + return number, num2, score2, num3, score3 +} + +// Old version - kept for backward compatibility +func recognizeDisplay(rotated gocv.Mat, display Display, templates map[int][]gocv.Mat, displayName string, frameCount int) (int, int, float64, int, float64) { + cleanName := strings.TrimSpace(strings.ToLower(displayName)) + + var leftDigit, rightDigit gocv.Mat + var isThreeDigit bool + + if display.IsSplit { + // Single contour - split in half + region := rotated.Region(display.FullRect) + displayImg := region.Clone() + region.Close() + gray := gocv.NewMat() + gocv.CvtColor(displayImg, &gray, gocv.ColorBGRToGray) + displayImg.Close() + thresh := gocv.NewMat() + gocv.Threshold(gray, &thresh, 128, 255, gocv.ThresholdBinary) + gray.Close() + + w := thresh.Cols() + h := thresh.Rows() + + // Check if this is a 3-digit display (significantly wider) + aspectRatio := float64(w) / float64(h) + if aspectRatio > 1.55 { // 3-digit displays are much wider + // 3-digit display: assume first digit is 1, crop it out + isThreeDigit = true + // Cut off left ~8% (the narrow "1" digit) + cutPoint := w / 12 // Approximately 8.3% + croppedRegion := thresh.Region(image.Rect(cutPoint, 0, w, h)) + cropped := croppedRegion.Clone() + croppedRegion.Close() + thresh.Close() + // Now split the remaining 2 digits + w2 := cropped.Cols() + h2 := cropped.Rows() + mid := w2 / 2 + leftRegion := cropped.Region(image.Rect(0, 0, mid, h2)) + leftDigit = leftRegion.Clone() + leftRegion.Close() + rightRegion := cropped.Region(image.Rect(mid, 0, w2, h2)) + rightDigit = rightRegion.Clone() + rightRegion.Close() + cropped.Close() + } else { + // Normal 2-digit display + isThreeDigit = false + mid := w / 2 + leftRegion := thresh.Region(image.Rect(0, 0, mid, h)) + leftDigit = leftRegion.Clone() + leftRegion.Close() + rightRegion := thresh.Region(image.Rect(mid, 0, w, h)) + rightDigit = rightRegion.Clone() + rightRegion.Close() + thresh.Close() + } + } else { + // Two separate contours - use directly + leftRegion := rotated.Region(display.LeftRect) + leftImg := leftRegion.Clone() + leftRegion.Close() + leftGray := gocv.NewMat() + gocv.CvtColor(leftImg, &leftGray, gocv.ColorBGRToGray) + leftImg.Close() + leftDigit = gocv.NewMat() + gocv.Threshold(leftGray, &leftDigit, 128, 255, gocv.ThresholdBinary) + leftGray.Close() + + rightRegion := rotated.Region(display.RightRect) + rightImg := rightRegion.Clone() + rightRegion.Close() + rightGray := gocv.NewMat() + gocv.CvtColor(rightImg, &rightGray, gocv.ColorBGRToGray) + rightImg.Close() + rightDigit = gocv.NewMat() + gocv.Threshold(rightGray, &rightDigit, 128, 255, gocv.ThresholdBinary) + rightGray.Close() + } + + leftFilename := fmt.Sprintf("review/f%d_%s_left.png", frameCount, cleanName) + rightFilename := fmt.Sprintf("review/f%d_%s_right.png", frameCount, cleanName) + gocv.IMWrite(leftFilename, leftDigit) + gocv.IMWrite(rightFilename, rightDigit) + + leftNum, leftScore := matchDigit(leftDigit, templates) + rightNum, rightScore := matchDigit(rightDigit, templates) + + var number int + if isThreeDigit { + number = 100 + leftNum*10 + rightNum + } else { + number = leftNum*10 + rightNum + } + + // Create labeled full image with recognized value at top + var fullForLabel gocv.Mat + if display.IsSplit { + // Recreate thresh for SPLIT mode + region := rotated.Region(display.FullRect) + displayImg := region.Clone() + region.Close() + gray := gocv.NewMat() + gocv.CvtColor(displayImg, &gray, gocv.ColorBGRToGray) + displayImg.Close() + fullForLabel = gocv.NewMat() + gocv.Threshold(gray, &fullForLabel, 128, 255, gocv.ThresholdBinary) + gray.Close() + } else { + // Combine left and right for DUAL mode + combinedWidth := leftDigit.Cols() + rightDigit.Cols() + combinedHeight := leftDigit.Rows() + if rightDigit.Rows() > combinedHeight { + combinedHeight = rightDigit.Rows() + } + fullForLabel = gocv.NewMatWithSize(combinedHeight, combinedWidth, leftDigit.Type()) + leftROI := fullForLabel.Region(image.Rect(0, 0, leftDigit.Cols(), leftDigit.Rows())) + leftDigit.CopyTo(&leftROI) + leftROI.Close() + rightROI := fullForLabel.Region(image.Rect(leftDigit.Cols(), 0, combinedWidth, rightDigit.Rows())) + rightDigit.CopyTo(&rightROI) + rightROI.Close() + } + + // Add padding at top for label + labeledFull := gocv.NewMat() + gocv.CopyMakeBorder(fullForLabel, &labeledFull, 60, 0, 0, 0, gocv.BorderConstant, color.RGBA{0, 0, 0, 255}) + fullForLabel.Close() + + // Draw label with larger font + label := fmt.Sprintf("%d", number) + textColor := color.RGBA{255, 255, 255, 255} + gocv.PutText(&labeledFull, label, image.Pt(10, 45), gocv.FontHersheyDuplex, 1.8, textColor, 3) + + // Save labeled full image + fullFilename := fmt.Sprintf("review/f%d_%s_full.png", frameCount, cleanName) + gocv.IMWrite(fullFilename, labeledFull) + labeledFull.Close() + leftDigit.Close() + rightDigit.Close() + + // Note: This old function is kept for backward compatibility but should use logger if needed + // For now, keeping console output since this isn't actively used + return number, leftNum, leftScore, rightNum, rightScore +} diff --git a/backups/backup_20251030_043342/pulse-monitor.go b/backups/backup_20251030_043342/pulse-monitor.go new file mode 100644 index 0000000..666cfad --- /dev/null +++ b/backups/backup_20251030_043342/pulse-monitor.go @@ -0,0 +1,930 @@ +package main + +import ( + "fmt" + "image" + "io" + "log" + "os" + "os/signal" + "syscall" + "time" + + "gocv.io/x/gocv" +) + +const VERSION = "v2.35" + +// Display and digit measurement constants +const ( + CUT_WIDTH = 280 // Width of cropped digit area + DIGIT_ONE_WIDTH = 72 // Width of narrow '1' digit + DIGIT_NON_ONE_WIDTH = 100 // Width of regular digits (0,2-9) + MIN_BOX_HEIGHT = 110 // Minimum height for valid digit contours +) + +func main() { + // Check if running in single-frame test mode + if len(os.Args) >= 2 { + framePath := os.Args[1] + runSingleFrameMode(framePath) + return + } + + // Normal streaming mode + runStreamingMode() +} + +func runSingleFrameMode(framePath string) { + fmt.Printf("=== Single Frame Test Mode ===\n") + fmt.Printf("Loading frame: %s\n\n", framePath) + + // Load the frame + rotated := gocv.IMRead(framePath, gocv.IMReadColor) + if rotated.Empty() { + fmt.Printf("āŒ Failed to load frame: %s\n", framePath) + return + } + defer rotated.Close() + + fmt.Printf("āœ“ Frame loaded: %dx%d\n", rotated.Cols(), rotated.Rows()) + + // Detect and normalize screen width + screenWidth := detectScreenWidth(rotated, "test_output/screen_width_detection.jpg") + if screenWidth == 0 { + fmt.Println("āŒ Failed to detect screen width") + return + } + fmt.Printf("āœ“ Detected screen width: %dpx\n", screenWidth) + + targetWidth := 860 + normalized := normalizeToWidth(rotated, targetWidth) + if normalized.Ptr() != rotated.Ptr() { + defer normalized.Close() + fmt.Printf("āœ“ Normalized to %dpx\n", targetWidth) + } + frameToUse := normalized + fmt.Println() + + // Load templates + fmt.Println("Loading templates...") + templates, err := loadTemplates() + if err != nil { + fmt.Printf("āŒ Error loading templates: %v\n", err) + return + } + defer func() { + for _, templateList := range templates { + for _, t := range templateList { + t.Close() + } + } + }() + fmt.Println("āœ“ Templates loaded\n") + + // Create output directory + os.MkdirAll("test_output", 0755) + + // Try layout detection + fmt.Println("Attempting layout detection...") + screenLayout, rescaled, err := detectScreenLayoutAreas(frameToUse) + if err != nil { + fmt.Printf("āŒ Layout detection failed: %v\n", err) + return + } + if !rescaled.Empty() { + rescaled.Close() + } + + fmt.Println("āœ“ Layout detected successfully") + fmt.Printf(" SpO2 area: X=%d-%d, Y=%d-%d, Size=%dx%d\n", + screenLayout.SpO2Area.Min.X, screenLayout.SpO2Area.Max.X, + screenLayout.SpO2Area.Min.Y, screenLayout.SpO2Area.Max.Y, + screenLayout.SpO2Area.Dx(), screenLayout.SpO2Area.Dy()) + fmt.Printf(" HR area: X=%d-%d, Y=%d-%d, Size=%dx%d\n\n", + screenLayout.HRArea.Min.X, screenLayout.HRArea.Max.X, + screenLayout.HRArea.Min.Y, screenLayout.HRArea.Max.Y, + screenLayout.HRArea.Dx(), screenLayout.HRArea.Dy()) + + // Save layout visualization + saveLayoutVisualization(frameToUse, screenLayout, "test_output/layout_boxes.jpg") + fmt.Println("āœ“ Saved: test_output/layout_boxes.jpg") + + // Run OCR on both displays + fmt.Println("\nRunning OCR...") + spo2Val, spo2Left, spo2LeftConf, spo2Right, spo2RightConf := recognizeDisplayArea(frameToUse, screenLayout.SpO2Area, templates, "SpO2", 0, nil) + hrVal, hrLeft, hrLeftConf, hrRight, hrRightConf := recognizeDisplayArea(frameToUse, screenLayout.HRArea, templates, "HR", 0, nil) + + spo2AvgConf := (spo2LeftConf + spo2RightConf) / 2.0 + hrAvgConf := (hrLeftConf + hrRightConf) / 2.0 + + fmt.Printf("\n=== RESULTS ===\n") + fmt.Printf("SpO2: %d%% (left=%d @%.1f%%, right=%d @%.1f%%, avg=%.1f%%)\n", + spo2Val, spo2Left, spo2LeftConf, spo2Right, spo2RightConf, spo2AvgConf) + fmt.Printf("HR: %d bpm (left=%d @%.1f%%, right=%d @%.1f%%, avg=%.1f%%)\n", + hrVal, hrLeft, hrLeftConf, hrRight, hrRightConf, hrAvgConf) + + if spo2Left == -1 || spo2Right == -1 || hrLeft == -1 || hrRight == -1 { + fmt.Println("\nāš ļø Invalid/corrupted digit detected") + } + + if spo2AvgConf < 85 || hrAvgConf < 85 { + fmt.Println("\nāš ļø Low confidence reading") + } + + // Check for output files + fmt.Println("\n=== OUTPUT FILES ===") + fmt.Println("Debug visualizations (in current directory):") + fmt.Println(" debug_01_original.png - Original frame") + fmt.Println(" debug_02_gray.png - Grayscale") + fmt.Println(" debug_03_threshold.png - Thresholded") + fmt.Println(" debug_04_all_boxes.png - All detected boxes") + fmt.Println(" debug_05_bounding_and_line.png - Bounding box & 50% line") + fmt.Println(" debug_06_center_boxes.png - Center boxes") + fmt.Println("\nTest outputs (in test_output/):") + fmt.Println(" test_output/layout_boxes.jpg - Layout visualization") + + // Check if review files were created + if _, err := os.Stat("review/f0_spo2_full.png"); err == nil { + fmt.Println(" review/f0_spo2_full.png - SpO2 recognition") + fmt.Println(" review/f0_hr_full.png - HR recognition") + fmt.Println(" review/f0_spo2_digit*.png - Individual SpO2 digits") + fmt.Println(" review/f0_hr_digit*.png - Individual HR digits") + } + + fmt.Println("\nāœ“ Single frame test complete") +} + +func runStreamingMode() { + // Create log file with timestamp + logFilename := fmt.Sprintf("pulse-monitor_%s.log", time.Now().Format("20060102_150405")) + logFile, err := os.OpenFile(logFilename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + fmt.Printf("Warning: Could not create log file: %v\n", err) + fmt.Printf("Continuing without file logging...\n\n") + } else { + defer logFile.Close() + // Create multi-writer to write to both stdout and file + multiWriter := io.MultiWriter(os.Stdout, logFile) + log.SetOutput(multiWriter) + log.SetFlags(0) // Disable timestamp prefix since we use our own + log.Printf("šŸ“ Logging to: %s\n", logFilename) + } + + log.Printf("=== Pulse-Ox Template Matching OCR %s ===\n\n", VERSION) + os.RemoveAll("review") + os.MkdirAll("review", 0755) + os.MkdirAll("raw_frames", 0755) + os.MkdirAll("test_output", 0755) + log.Println("šŸ—‘ļø Cleaned review/ directory") + + config, err := LoadConfig("config.yaml") + if err != nil { + log.Printf("Error loading config: %v\n", err) + return + } + templates, err := loadTemplates() + if err != nil { + log.Printf("Error loading templates: %v\n", err) + return + } + defer func() { + for _, templateList := range templates { + for _, t := range templateList { + t.Close() + } + } + }() + log.Println("šŸ“Š All processed frames saved to review/") + log.Println(" Press Ctrl+C to stop and generate review.html\n") + log.Println("Connecting to RTSP stream...") + + var stream *gocv.VideoCapture + for { + stream, err = gocv.VideoCaptureFile(config.Camera.RTSPURL) + if err == nil { + break + } + log.Printf("Failed to connect: %v\n", err) + log.Println("Retrying in 5 seconds...") + time.Sleep(5 * time.Second) + } + defer stream.Close() + log.Println("Connected! Press Ctrl+C to stop") + log.Printf("Posting to HASS: %s\n\n", config.HomeAssistant.URL) + + // Detect screen width and layout once at startup + log.Println("Detecting screen width and layout...") + var screenLayout *ScreenLayout + var lockedScale float64 = 1.0 // Scale from last successful layout detection + for screenLayout == nil { + frame := gocv.NewMat() + if ok := stream.Read(&frame); !ok { + log.Println("Failed to read frame for layout detection, retrying...") + time.Sleep(1 * time.Second) + frame.Close() + continue + } + if frame.Empty() || frame.Cols() < 640 || frame.Rows() < 480 { + frame.Close() + continue + } + noTs := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + rotated := gocv.NewMat() + gocv.Rotate(noTs, &rotated, gocv.Rotate90Clockwise) + noTs.Close() + + // Detect layout with normalization + var err error + screenLayout, lockedScale, err = detectLayoutWithNormalization(rotated) + rotated.Close() + frame.Close() + + if err != nil { + log.Printf(" Pulse oximeter not detected (day mode) - waiting 60 seconds...\n") + screenLayout = nil + time.Sleep(60 * time.Second) + continue + } + + log.Printf(" āœ“ Layout detected, scale locked: %.3f\n", lockedScale) + } + + log.Println("āœ“ Screen width and layout locked") + if logFile != nil { + fmt.Fprintf(logFile, " SpO2 area: X=%d, Y=%d, Width=%d, Height=%d\n", + screenLayout.SpO2Area.Min.X, screenLayout.SpO2Area.Min.Y, + screenLayout.SpO2Area.Dx(), screenLayout.SpO2Area.Dy()) + fmt.Fprintf(logFile, " HR area: X=%d, Y=%d, Width=%d, Height=%d\n", + screenLayout.HRArea.Min.X, screenLayout.HRArea.Min.Y, + screenLayout.HRArea.Dx(), screenLayout.HRArea.Dy()) + } + log.Println() + + var reviewEntries []ReviewEntry + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) + done := make(chan bool) + go func() { + <-sigChan + log.Println("\n\nšŸ›‘ Received stop signal, finishing up...") + done <- true + }() + + frameCount := 0 + processedCount := 0 + successCount := 0 + failureCount := 0 + // Track last values to only save/post when changed + lastSpO2 := -1 + lastHR := -1 + // Track last reading for stability check (need 2 consecutive within 3 points) + lastReadingSpO2 := -1 + lastReadingHR := -1 + // Track pending reading (held for hindsight validation) + type PendingReading struct { + spO2 int + hr int + timestamp string + valid bool + } + pendingReading := PendingReading{valid: false} + // Track baseline before spike (for comparing against new readings) + baselineSpO2 := -1 + baselineHR := -1 + // Track retry state for layout re-detection (separate from pending logic) + inLayoutRetry := false + + mainLoop: + for { + select { + case <-done: + break mainLoop + default: + } + frame := gocv.NewMat() + if ok := stream.Read(&frame); !ok { + log.Printf("Failed to read frame, reconnecting...\n") + time.Sleep(5 * time.Second) + stream.Close() + for { + stream, err = gocv.VideoCaptureFile(config.Camera.RTSPURL) + if err == nil { + log.Println("Reconnected!") + break + } + time.Sleep(10 * time.Second) + } + continue + } + frameCount++ + if frame.Empty() || frame.Cols() < 640 || frame.Rows() < 480 { + frame.Close() + continue + } + + // Log frame dimensions (first time and periodically) + if logFile != nil && (frameCount == 1 || frameCount%100 == 0) { + fmt.Fprintf(logFile, "[INFO] Frame #%d dimensions: %dx%d\n", frameCount, frame.Cols(), frame.Rows()) + } + + // Process only every 4th frame (~4 fps at 15fps stream) + if frameCount%4 != 0 { + frame.Close() + continue + } + + timestamp := time.Now().Format("15:04:05") + frameStart := time.Now() + + // Log to file only + if logFile != nil { + fmt.Fprintf(logFile, "[%s] Frame #%d\n", timestamp, frameCount) + } + + // Preprocessing: crop timestamp, rotate, apply scale + prepStart := time.Now() + noTs := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + rotated := gocv.NewMat() + gocv.Rotate(noTs, &rotated, gocv.Rotate90Clockwise) + noTs.Close() + + // Apply locked scale + normalized := applyScale(rotated, lockedScale) + if normalized.Ptr() != rotated.Ptr() { + rotated.Close() + } + frameToUse := normalized + prepTime := time.Since(prepStart) + + // Log to file only + if logFile != nil { + fmt.Fprintf(logFile, " Recognizing displays...\n") + } + spo2Start := time.Now() + spo2Val, spo2Left, spo2LeftConf, spo2Right, spo2RightConf := recognizeDisplayArea(frameToUse, screenLayout.SpO2Area, templates, "SpO2", frameCount, logFile) + spo2Time := time.Since(spo2Start) + + hrStart := time.Now() + hrVal, hrLeft, hrLeftConf, hrRight, hrRightConf := recognizeDisplayArea(frameToUse, screenLayout.HRArea, templates, "HR", frameCount, logFile) + hrTime := time.Since(hrStart) + + spo2AvgConf := (spo2LeftConf + spo2RightConf) / 2.0 + hrAvgConf := (hrLeftConf + hrRightConf) / 2.0 + + // Check for invalid digits (corrupted reading) + if spo2Left == -1 || spo2Right == -1 || hrLeft == -1 || hrRight == -1 { + if logFile != nil { + fmt.Fprintf(logFile, " Invalid digit detected: SpO2(%d,%d) HR(%d,%d), reading next frame\n", spo2Left, spo2Right, hrLeft, hrRight) + } + // Skip to next frame and let normal processing handle it + frameToUse.Close() + frame.Close() + continue + } + + // Check if values changed + valuesChanged := spo2Val != lastSpO2 || hrVal != lastHR + + // Only process further if values changed + if !valuesChanged { + frameToUse.Close() + frame.Close() + continue + } + + // Values changed - NOW save debug images and timing + // Save raw processed frame for replay/testing + rawFilename := fmt.Sprintf("raw_frames/raw_%s-%05d.png", time.Now().Format("20060102"), frameCount) + gocv.IMWrite(rawFilename, frameToUse) + + // Draw visualization of stored layout + saveLayoutVisualization(frameToUse, screenLayout, fmt.Sprintf("review/f%d_boxes.jpg", frameCount)) + + totalTime := time.Since(frameStart) + + // Log detailed timing to file only + if logFile != nil { + fmt.Fprintf(logFile, " Timing: Prep=%dms, SpO2=%dms, HR=%dms, Total=%dms\n", + prepTime.Milliseconds(), spo2Time.Milliseconds(), hrTime.Milliseconds(), totalTime.Milliseconds()) + } + + // Values changed - update tracking + processedCount++ + lastSpO2 = spo2Val + lastHR = hrVal + + // Print to console (simple) + fmt.Printf("[%s] SpO2=%d%%, HR=%d bpm\n", timestamp, spo2Val, hrVal) + + // Add to review entries + entry := ReviewEntry{ + FrameNum: frameCount, // Use actual frame number for file references + Timestamp: timestamp, + SpO2Value: spo2Val, + SpO2LeftDigit: spo2Left, + SpO2LeftConf: spo2LeftConf, + SpO2RightDigit: spo2Right, + SpO2RightConf: spo2RightConf, + HRValue: hrVal, + HRLeftDigit: hrLeft, + HRLeftConf: hrLeftConf, + HRRightDigit: hrRight, + HRRightConf: hrRightConf, + Unstable: false, + UnstableReason: "", + } + reviewEntries = append(reviewEntries, entry) + + if spo2AvgConf > 85 && hrAvgConf > 85 { + // High confidence - check for stability with hindsight validation + + // Check if values changed too much from last reading (>3 points) + if lastReadingSpO2 != -1 && lastReadingHR != -1 { + spo2Diff := spo2Val - lastReadingSpO2 + if spo2Diff < 0 { + spo2Diff = -spo2Diff + } + hrDiff := hrVal - lastReadingHR + if hrDiff < 0 { + hrDiff = -hrDiff + } + + if spo2Diff > 3 || hrDiff > 3 { + // Values changed too much - check if we have a pending reading + if pendingReading.valid { + // We have a pending reading - check direction + spo2Direction := spo2Val - pendingReading.spO2 + hrDirection := hrVal - pendingReading.hr + + // Check if both readings moved in same direction from baseline + pendingSpo2Diff := pendingReading.spO2 - lastReadingSpO2 + pendingHrDiff := pendingReading.hr - lastReadingHR + + // Same direction if signs match (both positive or both negative) + spo2SameDir := (spo2Direction > 0 && pendingSpo2Diff > 0) || (spo2Direction < 0 && pendingSpo2Diff < 0) + hrSameDir := (hrDirection > 0 && pendingHrDiff > 0) || (hrDirection < 0 && pendingHrDiff < 0) + + if spo2SameDir && hrSameDir { + // Both readings trending same direction - real trend, post pending! + fmt.Printf(" āœ“ Hindsight validation: trend confirmed, posting held reading\n") + if logFile != nil { + fmt.Fprintf(logFile, " Hindsight validation: trend confirmed (SpO2: %d->%d->%d, HR: %d->%d->%d)\n", + lastReadingSpO2, pendingReading.spO2, spo2Val, + lastReadingHR, pendingReading.hr, hrVal) + } + + // Post the pending reading + postStart := time.Now() + spo2Err := postToHomeAssistant(config, "sensor.pulse_ox_spo2", pendingReading.spO2, "%", "SpO2") + hrErr := postToHomeAssistant(config, "sensor.pulse_ox_hr", pendingReading.hr, "bpm", "Heart Rate") + postTime := time.Since(postStart) + if spo2Err == nil && hrErr == nil { + successCount++ + if logFile != nil { + fmt.Fprintf(logFile, " Posted pending reading successfully in %dms (success: %d, fail: %d)\n", postTime.Milliseconds(), successCount, failureCount) + } + } else { + failureCount++ + if logFile != nil { + fmt.Fprintf(logFile, " Failed to post pending reading\n") + } + } + + // Current reading becomes new pending + pendingReading = PendingReading{ + spO2: spo2Val, + hr: hrVal, + timestamp: timestamp, + valid: true, + } + lastReadingSpO2 = spo2Val + lastReadingHR = hrVal + inLayoutRetry = false + frameToUse.Close() + frame.Close() + continue + } else { + // Opposite directions - pending was a glitch, discard it + fmt.Printf(" āš ļø Direction mismatch: discarding held reading (glitch)\n") + if logFile != nil { + fmt.Fprintf(logFile, " Direction mismatch: discarding held reading (SpO2: %d->%d->%d, HR: %d->%d->%d)\n", + lastReadingSpO2, pendingReading.spO2, spo2Val, + lastReadingHR, pendingReading.hr, hrVal) + } + pendingReading.valid = false + lastReadingSpO2 = spo2Val + lastReadingHR = hrVal + inLayoutRetry = false + frameToUse.Close() + frame.Close() + continue + } + } else { + // No pending reading yet - hold this one + if !inLayoutRetry { + inLayoutRetry = true + + // Mark this entry as unstable + entry.Unstable = true + entry.UnstableReason = fmt.Sprintf("Unstable (SpO2 Ī”%d, HR Ī”%d)", spo2Diff, hrDiff) + + fmt.Printf(" āš ļø Unstable reading (SpO2 Ī”%d, HR Ī”%d), holding for validation...\n", spo2Diff, hrDiff) + if logFile != nil { + fmt.Fprintf(logFile, " Unstable reading - SpO2 delta: %d, HR delta: %d, holding for validation...\n", spo2Diff, hrDiff) + } + + // Store baseline (before the spike) for comparison + baselineSpO2 = lastReadingSpO2 + baselineHR = lastReadingHR + + // Store as pending + pendingReading = PendingReading{ + spO2: spo2Val, + hr: hrVal, + timestamp: timestamp, + valid: true, + } + lastReadingSpO2 = spo2Val + lastReadingHR = hrVal + + rotated.Close() + + // Read next frame immediately + retryFrame := gocv.NewMat() + if ok := stream.Read(&retryFrame); !ok { + fmt.Printf(" Failed to read retry frame\n") + retryFrame.Close() + inLayoutRetry = false + pendingReading.valid = false + continue + } + frameCount++ + + // Re-detect layout + for { + fmt.Printf(" Re-detecting layout...\n") + if logFile != nil { + fmt.Fprintf(logFile, " Re-detecting layout...\n") + } + + noTsRetry := retryFrame.Region(image.Rect(0, 68, retryFrame.Cols(), retryFrame.Rows())) + rotatedRetry := gocv.NewMat() + gocv.Rotate(noTsRetry, &rotatedRetry, gocv.Rotate90Clockwise) + noTsRetry.Close() + + newLayout, newScale, err := detectLayoutWithNormalization(rotatedRetry) + rotatedRetry.Close() + + if err != nil { + fmt.Printf(" Pulse oximeter not detected (day mode) - waiting 60 seconds...\n") + if logFile != nil { + fmt.Fprintf(logFile, " Pulse oximeter not detected (day mode) - waiting 60 seconds...\n") + } + // Reset retry state + inLayoutRetry = false + pendingReading.valid = false + retryFrame.Close() + time.Sleep(60 * time.Second) + // Read next frame and try again + retryFrame.Close() + retryFrame = gocv.NewMat() + if ok := stream.Read(&retryFrame); !ok { + fmt.Printf(" Failed to read frame\n") + retryFrame.Close() + time.Sleep(5 * time.Second) + continue + } + continue + } + + screenLayout = newLayout + lockedScale = newScale + fmt.Printf(" āœ“ Layout re-detected\n") + if logFile != nil { + fmt.Fprintf(logFile, " Layout re-detected successfully\n") + } + break + } + retryFrame.Close() + continue + } else { + // Already in retry and still unstable - give up + inLayoutRetry = false + pendingReading.valid = false + lastReadingSpO2 = spo2Val + lastReadingHR = hrVal + + // Mark this entry as unstable (second try) + entry.Unstable = true + entry.UnstableReason = fmt.Sprintf("Still unstable after retry (SpO2 Ī”%d, HR Ī”%d)", spo2Diff, hrDiff) + + fmt.Printf(" āš ļø Still unstable after retry, skipping\n") + if logFile != nil { + fmt.Fprintf(logFile, " Still unstable after retry - SpO2 delta: %d, HR delta: %d\n\n", spo2Diff, hrDiff) + } + frameToUse.Close() + frame.Close() + continue + } + } + } else { + // Stable reading (Δ≤3) + if pendingReading.valid { + // Check if new reading is closer to baseline or to pending value + spo2ToBaseline := spo2Val - baselineSpO2 + if spo2ToBaseline < 0 { + spo2ToBaseline = -spo2ToBaseline + } + hrToBaseline := hrVal - baselineHR + if hrToBaseline < 0 { + hrToBaseline = -hrToBaseline + } + + spo2ToPending := spo2Val - pendingReading.spO2 + if spo2ToPending < 0 { + spo2ToPending = -spo2ToPending + } + hrToPending := hrVal - pendingReading.hr + if hrToPending < 0 { + hrToPending = -hrToPending + } + + // If closer to baseline than to pending, pending was a glitch + if spo2ToBaseline <= spo2ToPending && hrToBaseline <= hrToPending { + fmt.Printf(" āœ“ New reading closer to baseline (%d,%d) than pending (%d,%d), discarding glitch\n", + baselineSpO2, baselineHR, pendingReading.spO2, pendingReading.hr) + if logFile != nil { + fmt.Fprintf(logFile, " Discarding pending glitch: baseline %d->pending %d->current %d (SpO2), baseline %d->pending %d->current %d (HR)\n", + baselineSpO2, pendingReading.spO2, spo2Val, + baselineHR, pendingReading.hr, hrVal) + } + pendingReading.valid = false + // Fall through to post current reading + } else { + // Closer to pending - real trend, post pending first + fmt.Printf(" āœ“ Stable reading confirms held value, posting both\n") + if logFile != nil { + fmt.Fprintf(logFile, " Stable reading confirms held value: %d->%d->%d (SpO2), %d->%d->%d (HR)\n", + baselineSpO2, pendingReading.spO2, spo2Val, + baselineHR, pendingReading.hr, hrVal) + } + + postStart := time.Now() + spo2Err := postToHomeAssistant(config, "sensor.pulse_ox_spo2", pendingReading.spO2, "%", "SpO2") + hrErr := postToHomeAssistant(config, "sensor.pulse_ox_hr", pendingReading.hr, "bpm", "Heart Rate") + postTime := time.Since(postStart) + if spo2Err == nil && hrErr == nil { + successCount++ + if logFile != nil { + fmt.Fprintf(logFile, " Posted pending reading successfully in %dms (success: %d, fail: %d)\n", postTime.Milliseconds(), successCount, failureCount) + } + } + pendingReading.valid = false + } + } + // Fall through to post current reading + } + } + + // Values are stable (within 3 points) - post to Home Assistant + inLayoutRetry = false // Reset retry flag on success + lastReadingSpO2 = spo2Val + lastReadingHR = hrVal + postStart := time.Now() + spo2Err := postToHomeAssistant(config, "sensor.pulse_ox_spo2", spo2Val, "%", "SpO2") + hrErr := postToHomeAssistant(config, "sensor.pulse_ox_hr", hrVal, "bpm", "Heart Rate") + postTime := time.Since(postStart) + if spo2Err == nil && hrErr == nil { + successCount++ + if logFile != nil { + fmt.Fprintf(logFile, " Posted successfully in %dms (success: %d, fail: %d)\n\n", postTime.Milliseconds(), successCount, failureCount) + } + } else { + failureCount++ + fmt.Printf(" āŒ Post failed\n") + if logFile != nil { + if spo2Err != nil { + fmt.Fprintf(logFile, " SpO2 post error: %v\n", spo2Err) + } + if hrErr != nil { + fmt.Fprintf(logFile, " HR post error: %v\n", hrErr) + } + fmt.Fprintf(logFile, " (success: %d, fail: %d)\n\n", successCount, failureCount) + } + } + frameToUse.Close() + frame.Close() + } else { + failureCount++ + + // Update last reading even on low confidence (for next comparison) + lastReadingSpO2 = spo2Val + lastReadingHR = hrVal + + if !inLayoutRetry { + // First failure - retry with next frame and re-detect layout + inLayoutRetry = true + fmt.Printf(" āš ļø Low confidence, retrying with next frame...\n") + if logFile != nil { + fmt.Fprintf(logFile, " Low confidence - SpO2: %.1f%%, HR: %.1f%%, retrying...\n", spo2AvgConf, hrAvgConf) + } + + rotated.Close() + + // Read next frame immediately + retryFrame := gocv.NewMat() + if ok := stream.Read(&retryFrame); !ok { + fmt.Printf(" Failed to read retry frame\n") + retryFrame.Close() + inLayoutRetry = false + continue + } + frameCount++ + timestamp = time.Now().Format("15:04:05") + + // Re-detect layout + for { + fmt.Printf(" Re-detecting layout...\n") + if logFile != nil { + fmt.Fprintf(logFile, " Re-detecting layout...\n") + } + + noTsRetry := retryFrame.Region(image.Rect(0, 68, retryFrame.Cols(), retryFrame.Rows())) + rotated = gocv.NewMat() + gocv.Rotate(noTsRetry, &rotated, gocv.Rotate90Clockwise) + noTsRetry.Close() + + newLayout, newScale, err := detectLayoutWithNormalization(rotated) + if err != nil { + fmt.Printf(" Pulse oximeter not detected (day mode) - waiting 60 seconds...\n") + if logFile != nil { + fmt.Fprintf(logFile, " Pulse oximeter not detected (day mode) - waiting 60 seconds...\n") + } + // Reset retry state + inLayoutRetry = false + rotated.Close() + retryFrame.Close() + time.Sleep(60 * time.Second) + // Read next frame and try again + retryFrame.Close() + retryFrame = gocv.NewMat() + if ok := stream.Read(&retryFrame); !ok { + fmt.Printf(" Failed to read frame\n") + retryFrame.Close() + time.Sleep(5 * time.Second) + continue + } + continue + } + + screenLayout = newLayout + lockedScale = newScale + fmt.Printf(" āœ“ Layout re-detected, processing retry frame\n") + if logFile != nil { + fmt.Fprintf(logFile, " Layout re-detected successfully, processing frame #%d\n", frameCount) + fmt.Fprintf(logFile, " SpO2 area: X=%d, Y=%d, Width=%d, Height=%d\n", + screenLayout.SpO2Area.Min.X, screenLayout.SpO2Area.Min.Y, + screenLayout.SpO2Area.Dx(), screenLayout.SpO2Area.Dy()) + fmt.Fprintf(logFile, " HR area: X=%d, Y=%d, Width=%d, Height=%d\n", + screenLayout.HRArea.Min.X, screenLayout.HRArea.Min.Y, + screenLayout.HRArea.Dx(), screenLayout.HRArea.Dy()) + } + break + } + + // Apply the new scale to the retry frame + normalized := applyScale(rotated, lockedScale) + if normalized.Ptr() != rotated.Ptr() { + rotated.Close() + rotated = normalized + } + + // Now process this retry frame (fall through to loop which will read frame 248 next) + saveLayoutVisualization(rotated, screenLayout, fmt.Sprintf("review/f%d_boxes.jpg", frameCount)) + + if logFile != nil { + fmt.Fprintf(logFile, " Recognizing displays...\n") + } + spo2Val, spo2Left, spo2LeftConf, spo2Right, spo2RightConf = recognizeDisplayArea(rotated, screenLayout.SpO2Area, templates, "SpO2", frameCount, logFile) + hrVal, hrLeft, hrLeftConf, hrRight, hrRightConf = recognizeDisplayArea(rotated, screenLayout.HRArea, templates, "HR", frameCount, logFile) + + spo2AvgConf = (spo2LeftConf + spo2RightConf) / 2.0 + hrAvgConf = (hrLeftConf + hrRightConf) / 2.0 + + // Check if retry succeeded with high confidence + if spo2AvgConf > 85 && hrAvgConf > 85 { + // Retry succeeded - check if values changed + valuesChanged = spo2Val != lastSpO2 || hrVal != lastHR + if !valuesChanged { + rotated.Close() + retryFrame.Close() + frame.Close() + inLayoutRetry = false + continue + } + + // Values changed - update + processedCount++ + lastSpO2 = spo2Val + lastHR = hrVal + fmt.Printf("[%s] SpO2=%d%%, HR=%d bpm\n", timestamp, spo2Val, hrVal) + + entry = ReviewEntry{ + FrameNum: frameCount, + Timestamp: timestamp, + SpO2Value: spo2Val, + SpO2LeftDigit: spo2Left, + SpO2LeftConf: spo2LeftConf, + SpO2RightDigit: spo2Right, + SpO2RightConf: spo2RightConf, + HRValue: hrVal, + HRLeftDigit: hrLeft, + HRLeftConf: hrLeftConf, + HRRightDigit: hrRight, + HRRightConf: hrRightConf, + Unstable: false, + UnstableReason: "", + } + reviewEntries = append(reviewEntries, entry) + + // Now perform stability checks (same as high confidence path above) + if lastReadingSpO2 != -1 && lastReadingHR != -1 { + spo2Diff := spo2Val - lastReadingSpO2 + if spo2Diff < 0 { + spo2Diff = -spo2Diff + } + hrDiff := hrVal - lastReadingHR + if hrDiff < 0 { + hrDiff = -hrDiff + } + + if spo2Diff <= 3 && hrDiff <= 3 { + // Stable - post it + inLayoutRetry = false + lastReadingSpO2 = spo2Val + lastReadingHR = hrVal + postStart := time.Now() + spo2Err := postToHomeAssistant(config, "sensor.pulse_ox_spo2", spo2Val, "%", "SpO2") + hrErr := postToHomeAssistant(config, "sensor.pulse_ox_hr", hrVal, "bpm", "Heart Rate") + postTime := time.Since(postStart) + if spo2Err == nil && hrErr == nil { + successCount++ + if logFile != nil { + fmt.Fprintf(logFile, " Posted successfully in %dms (success: %d, fail: %d)\n\n", postTime.Milliseconds(), successCount, failureCount) + } + } else { + failureCount++ + } + rotated.Close() + retryFrame.Close() + frame.Close() + continue + } + } + + // First reading or stable - post it + inLayoutRetry = false + lastReadingSpO2 = spo2Val + lastReadingHR = hrVal + postStart := time.Now() + spo2Err := postToHomeAssistant(config, "sensor.pulse_ox_spo2", spo2Val, "%", "SpO2") + hrErr := postToHomeAssistant(config, "sensor.pulse_ox_hr", hrVal, "bpm", "Heart Rate") + postTime := time.Since(postStart) + if spo2Err == nil && hrErr == nil { + successCount++ + if logFile != nil { + fmt.Fprintf(logFile, " Posted successfully in %dms (success: %d, fail: %d)\n\n", postTime.Milliseconds(), successCount, failureCount) + } + } else { + failureCount++ + } + rotated.Close() + retryFrame.Close() + frame.Close() + continue + } + // If still low confidence, fall through to "second failure" below + } else { + // Second failure - give up and wait + inLayoutRetry = false + fmt.Printf(" āš ļø Low confidence after retry, pausing 2 seconds\n") + if logFile != nil { + fmt.Fprintf(logFile, " Low confidence after retry - SpO2: %.1f%%, HR: %.1f%% (success: %d, fail: %d)\n\n", spo2AvgConf, hrAvgConf, successCount, failureCount) + } + time.Sleep(2 * time.Second) + } + } + frameToUse.Close() + frame.Close() + } + + fmt.Println("\nšŸ“ Writing review.html...") + if err := writeReviewHTML(reviewEntries); err != nil { + fmt.Printf("Error writing review HTML: %v\n", err) + } else { + fmt.Printf("āœ“ Review page created: review/review.html (%d frames)\n", len(reviewEntries)) + fmt.Println(" Open it in browser to review recognition results") + fmt.Println(" Copy good digits: cp review/f5_spo2_digit2.png training_digits/9_2.png") + } +} diff --git a/backups/backup_20251122/detection.go b/backups/backup_20251122/detection.go new file mode 100644 index 0000000..e2a6b22 --- /dev/null +++ b/backups/backup_20251122/detection.go @@ -0,0 +1,530 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "math" + "sort" + + "gocv.io/x/gocv" +) + +// Global DEBUG flag is declared in test_rotation.go + +// Band represents a vertical band of contours +type Band struct { + minX int + maxX int + minY int + maxY int +} + +// DetectBands finds and groups contours into vertical bands +// Only returns bands with height >= minHeight pixels +func DetectBands(binary gocv.Mat, minHeight int) []Band { + type Box struct { + minX int + maxX int + minY int + maxY int + } + + // Find contours + contours := gocv.FindContours(binary, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + // Get all bounding boxes + boxes := []Box{} + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + boxes = append(boxes, Box{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y, + maxY: rect.Max.Y, + }) + } + + // Sort boxes by minY + sort.Slice(boxes, func(i, j int) bool { + return boxes[i].minY < boxes[j].minY + }) + + // Group overlapping/adjacent boxes into vertical bands + bands := []Band{} + + for _, box := range boxes { + // Skip boxes shorter than minHeight + height := box.maxY - box.minY + if height < minHeight { + continue + } + + if len(bands) == 0 { + // First band + bands = append(bands, Band{ + minX: box.minX, + maxX: box.maxX, + minY: box.minY, + maxY: box.maxY, + }) + } else { + lastBand := &bands[len(bands)-1] + + // Check if this box overlaps or is close to last band (within 5px) + if box.minY <= lastBand.maxY+5 { + // Merge: extend the band in both X and Y + if box.minX < lastBand.minX { + lastBand.minX = box.minX + } + if box.maxX > lastBand.maxX { + lastBand.maxX = box.maxX + } + if box.maxY > lastBand.maxY { + lastBand.maxY = box.maxY + } + } else { + // New band + bands = append(bands, Band{ + minX: box.minX, + maxX: box.maxX, + minY: box.minY, + maxY: box.maxY, + }) + } + } + } + + return bands +} + +// findBaseline analyzes the baseline in the graph band +// Returns left point, right point, width, number of sections, and error +func findBaseline(binary gocv.Mat, graphBand Band) (*image.Point, *image.Point, int, int, error) { + // Extract just the graph band region + graphRegion := binary.Region(image.Rect(0, graphBand.minY, binary.Cols(), graphBand.maxY+1)) + + // Find all contours in graph region + contours := gocv.FindContours(graphRegion, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + // Get sections (contours) with width >= 5px + type Section struct { + minX int + maxX int + minY int + maxY int + } + sections := []Section{} + + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + width := rect.Max.X - rect.Min.X + height := rect.Max.Y - rect.Min.Y + // Filter: width >= 5px AND height >= 80px + if width >= 5 && height >= 80 { + sections = append(sections, Section{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y + graphBand.minY, // Adjust to full image coordinates + maxY: rect.Max.Y + graphBand.minY, + }) + } + } + + // Check if we have exactly 2 sections + if len(sections) != 2 { + return nil, nil, 0, len(sections), fmt.Errorf("found %d sections (need exactly 2)", len(sections)) + } + + // Sort sections by X position (left to right) + sort.Slice(sections, func(i, j int) bool { + return sections[i].minX < sections[j].minX + }) + + // Horizontal width (left edge of left section to right edge of right section) + horizontalWidth := sections[1].maxX - sections[0].minX + + // Find rotation points + // Left section: lowest white pixel 3px from left edge + leftSection := sections[0] + leftX := leftSection.minX + 3 + leftPoint := findLowestWhitePixel(binary, leftX, graphBand.minY, graphBand.maxY) + + // Right section: lowest white pixel 3px from right edge + rightSection := sections[1] + rightX := rightSection.maxX - 3 + rightPoint := findLowestWhitePixel(binary, rightX, graphBand.minY, graphBand.maxY) + + if leftPoint == nil || rightPoint == nil { + return nil, nil, 0, len(sections), fmt.Errorf("could not find rotation endpoints") + } + + return leftPoint, rightPoint, horizontalWidth, len(sections), nil +} + +// findLowestWhitePixel finds the lowest (max Y) white pixel at given X within Y range +func findLowestWhitePixel(binary gocv.Mat, x int, minY int, maxY int) *image.Point { + lowestY := -1 + for y := minY; y <= maxY; y++ { + if binary.GetUCharAt(y, x) == 255 { + lowestY = y + } + } + if lowestY == -1 { + return nil + } + return &image.Point{X: x, Y: lowestY} +} + +// DetectionResult contains rotation and width detection results +type DetectionResult struct { + Rotation float64 + Width int + Success bool +} + +// PreprocessFrame takes a raw frame and prepares it for detection +// Returns the processed binary image +func PreprocessFrame(frame gocv.Mat) gocv.Mat { + // Crop top 68 pixels (timestamp) + cropped := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + + // Rotate 90° clockwise + rotated := gocv.NewMat() + gocv.Rotate(cropped, &rotated, gocv.Rotate90Clockwise) + + // Convert to grayscale + gray := gocv.NewMat() + gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray) + rotated.Close() + + // Threshold to binary + binary := gocv.NewMat() + gocv.Threshold(gray, &binary, 170, 255, gocv.ThresholdBinary) + gray.Close() + + return binary +} + +// DetectRotationAndWidth analyzes a binary frame and returns rotation angle and width +func DetectRotationAndWidth(binary gocv.Mat) DetectionResult { + // Detect bands (using 80px minimum height for original scale) + bands := DetectBands(binary, 80) + + // Check if we have enough bands + if len(bands) < 2 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - only %d bands found (need at least 2)\n", len(bands)) + } + return DetectionResult{Success: false} + } + + // Find TALLEST band (graph/waveform display) + tallestIdx := 0 + tallestHeight := 0 + for i, band := range bands { + height := band.maxY - band.minY + if height > tallestHeight { + tallestHeight = height + tallestIdx = i + } + } + + // Check if there's a band after the tallest (for digits) + if tallestIdx >= len(bands)-1 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - tallest band is last (idx=%d, total=%d)\n", tallestIdx, len(bands)) + } + return DetectionResult{Success: false} + } + + graphBand := bands[tallestIdx] // Tallest = graph/waveform + + // Find baseline + leftPoint, rightPoint, width, sections, err := findBaseline(binary, graphBand) + if err != nil || sections != 2 { + if DEBUG { + if err != nil { + fmt.Printf("DEBUG: Detection failed - baseline error: %v\n", err) + } else { + fmt.Printf("DEBUG: Detection failed - found %d sections (need exactly 2)\n", sections) + } + } + return DetectionResult{Success: false} + } + + // Calculate rotation angle + deltaY := float64(rightPoint.Y - leftPoint.Y) + deltaX := float64(rightPoint.X - leftPoint.X) + rotation := math.Atan2(deltaY, deltaX) * 180 / math.Pi + + return DetectionResult{ + Rotation: rotation, + Width: width, + Success: true, + } +} + +// VisualizeBands draws bands on an image with labels (only in DEBUG mode) +func VisualizeBands(binary gocv.Mat, bands []Band, tallestIdx int, filename string) { + if !DEBUG { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + blue := color.RGBA{R: 0, G: 0, B: 255, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + + // Draw all bands + for i, band := range bands { + bandColor := red + if i == tallestIdx { + bandColor = green // Graph band in green + } else if tallestIdx < len(bands)-1 && i == tallestIdx+1 { + bandColor = blue // Numbers band in blue + } + + rect := image.Rect(0, band.minY, viz.Cols(), band.maxY) + gocv.Rectangle(&viz, rect, bandColor, 2) + + label := fmt.Sprintf("Band #%d", i+1) + gocv.PutText(&viz, label, image.Pt(10, band.minY+20), + gocv.FontHersheyPlain, 1.5, yellow, 2) + } + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved visualization to %s\n", filename) +} + +// VisualizeDigitBand highlights the digit band specifically +func VisualizeDigitBand(binary gocv.Mat, bands []Band, tallestIdx int, filename string) { + if !DEBUG || tallestIdx >= len(bands)-1 { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + blue := color.RGBA{R: 0, G: 0, B: 255, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + + // Draw all bands with specific highlighting + for i, band := range bands { + bandColor := red + lineThickness := 1 + labelText := fmt.Sprintf("Band #%d", i+1) + + if i == tallestIdx { + bandColor = green + labelText = "GRAPH BAND" + } else if i == tallestIdx+1 { + // DIGIT BAND - highlight strongly + bandColor = cyan + lineThickness = 3 + labelText = fmt.Sprintf("DIGIT BAND [Y: %d-%d]", band.minY, band.maxY) + } + + rect := image.Rect(0, band.minY, viz.Cols(), band.maxY) + gocv.Rectangle(&viz, rect, bandColor, lineThickness) + + // Add label + labelColor := yellow + if i == tallestIdx+1 { + labelColor = cyan + } + gocv.PutText(&viz, labelText, image.Pt(10, band.minY+20), + gocv.FontHersheyPlain, 1.5, labelColor, 2) + } + + // Draw additional markers for digit band + if tallestIdx < len(bands)-1 { + digitBand := bands[tallestIdx+1] + + // Draw vertical lines at digit band boundaries + for x := 50; x < viz.Cols(); x += 100 { + gocv.Line(&viz, + image.Pt(x, digitBand.minY), + image.Pt(x, digitBand.maxY), + blue, 1) + } + + // Add dimension text + height := digitBand.maxY - digitBand.minY + dimText := fmt.Sprintf("Height: %dpx, Width: 860px", height) + gocv.PutText(&viz, dimText, image.Pt(10, digitBand.maxY+30), + gocv.FontHersheyPlain, 1.2, cyan, 2) + } + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved digit band visualization to %s\n", filename) +} + +// ValidateBaselineTwoSegments checks if the baseline consists of two separate segments +// by counting black-to-white transitions along the baseline (should be exactly 1) +func ValidateBaselineTwoSegments(binary gocv.Mat, leftPoint, rightPoint *image.Point) bool { + // Calculate slope for Y interpolation along the baseline + deltaY := float64(rightPoint.Y - leftPoint.Y) + deltaX := float64(rightPoint.X - leftPoint.X) + slope := deltaY / deltaX + + if DEBUG { + fmt.Printf("DEBUG: Validating baseline segments (slope=%.4f) from X=%d to X=%d\n", + slope, leftPoint.X, rightPoint.X) + } + + // Count black-to-white transitions + transitions := 0 + lastWasWhite := false + + for x := leftPoint.X; x <= rightPoint.X; x++ { + // Interpolate Y position along the baseline, then go 2px up (away from thick edge) + y := leftPoint.Y + int(float64(x-leftPoint.X)*slope) - 2 + isWhite := binary.GetUCharAt(y, x) == 255 + + // Detect black-to-white transition + if isWhite && !lastWasWhite { + transitions++ + } + lastWasWhite = isWhite + } + + if DEBUG { + fmt.Printf("DEBUG: Found %d black-to-white transitions\n", transitions) + } + + // We need exactly 1 black-to-white transition for 2 segments + // (entering the second segment after the gap) + if transitions != 1 { + if DEBUG { + fmt.Printf("DEBUG: INVALID - Need exactly 1 transition, found %d\n", transitions) + } + return false + } + + if DEBUG { + fmt.Println("DEBUG: VALID - Baseline has two distinct segments") + } + return true +} + +// RotateImage rotates an image by the given angle in degrees +// The angle is the detected rotation that needs to be corrected +func RotateImage(img gocv.Mat, angleDegrees float64) gocv.Mat { + // Get image center + center := image.Point{ + X: img.Cols() / 2, + Y: img.Rows() / 2, + } + + // Apply rotation to correct the tilt + // NOTE: If image rotates wrong direction, change to: -angleDegrees + rotMat := gocv.GetRotationMatrix2D(center, angleDegrees, 1.0) + defer rotMat.Close() + + // Apply rotation + rotated := gocv.NewMat() + gocv.WarpAffine(img, &rotated, rotMat, image.Point{X: img.Cols(), Y: img.Rows()}) + + if DEBUG { + fmt.Printf("DEBUG: Applied rotation: %.3f°\n", angleDegrees) + } + + return rotated +} + +// ScaleToWidth scales an image to the specified width while maintaining aspect ratio +func ScaleToWidth(img gocv.Mat, targetWidth int) gocv.Mat { + // Calculate scale factor + currentWidth := img.Cols() + scale := float64(targetWidth) / float64(currentWidth) + + // Calculate new dimensions + newHeight := int(float64(img.Rows()) * scale) + + // Resize image + scaled := gocv.NewMat() + gocv.Resize(img, &scaled, image.Point{X: targetWidth, Y: newHeight}, 0, 0, gocv.InterpolationLinear) + + if DEBUG { + fmt.Printf("DEBUG: Scaled image from %dx%d to %dx%d (scale factor: %.3f)\n", + currentWidth, img.Rows(), targetWidth, newHeight, scale) + } + + return scaled +} + +// VisualizeSimpleBands draws just the graph and digit bands clearly +func VisualizeSimpleBands(binary gocv.Mat, graphBand, digitBand Band, filename string) { + if !DEBUG { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + + // Draw graph band in green + graphRect := image.Rect(0, graphBand.minY, viz.Cols(), graphBand.maxY) + gocv.Rectangle(&viz, graphRect, green, 2) + gocv.PutText(&viz, "GRAPH BAND", image.Pt(10, graphBand.minY+20), + gocv.FontHersheyPlain, 1.5, green, 2) + + // Draw digit band in cyan with thick border + digitRect := image.Rect(0, digitBand.minY, viz.Cols(), digitBand.maxY) + gocv.Rectangle(&viz, digitRect, cyan, 3) + label := fmt.Sprintf("DIGIT BAND [Y: %d-%d, H: %dpx]", + digitBand.minY, digitBand.maxY, digitBand.maxY-digitBand.minY) + gocv.PutText(&viz, label, image.Pt(10, digitBand.minY+20), + gocv.FontHersheyPlain, 1.5, cyan, 2) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved 2-band visualization to %s\n", filename) +} + +// ExtractBandRegion extracts a specific band region from an image +func ExtractBandRegion(img gocv.Mat, band Band) gocv.Mat { + // Create a region of interest (ROI) for the band + rect := image.Rect(0, band.minY, img.Cols(), band.maxY) + region := img.Region(rect) + + // Clone the region to create an independent Mat + extracted := gocv.NewMat() + region.CopyTo(&extracted) + + if DEBUG { + fmt.Printf("DEBUG: Extracted band region [%d-%d], size: %dx%d\n", + band.minY, band.maxY, extracted.Cols(), extracted.Rows()) + } + + return extracted +} + +// ScaleByFactor scales an image by a specific scale factor +func ScaleByFactor(img gocv.Mat, scaleFactor float64) gocv.Mat { + // Calculate new dimensions + newWidth := int(float64(img.Cols()) * scaleFactor) + newHeight := int(float64(img.Rows()) * scaleFactor) + + // Resize image + scaled := gocv.NewMat() + gocv.Resize(img, &scaled, image.Point{X: newWidth, Y: newHeight}, 0, 0, gocv.InterpolationLinear) + + if DEBUG { + fmt.Printf("DEBUG: Scaled image from %dx%d to %dx%d (scale factor: %.3f)\n", + img.Cols(), img.Rows(), newWidth, newHeight, scaleFactor) + } + + return scaled +} diff --git a/backups/backup_20251122/layout_detection.go b/backups/backup_20251122/layout_detection.go new file mode 100644 index 0000000..0384854 --- /dev/null +++ b/backups/backup_20251122/layout_detection.go @@ -0,0 +1,957 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "math" + "sort" + "time" + + "gocv.io/x/gocv" +) + +type Display struct { + IsSplit bool // true = one box split in half, false = two separate boxes + FullRect image.Rectangle // used when IsSplit=true + LeftRect image.Rectangle // used when IsSplit=false + RightRect image.Rectangle // used when IsSplit=false +} + +type ScreenLayout struct { + SpO2Area image.Rectangle + HRArea image.Rectangle +} + +// rotateImage rotates an image by the specified angle (in degrees) +// Positive angle = counter-clockwise, negative = clockwise +func rotateImage(src gocv.Mat, angleDegrees float64) gocv.Mat { + logMessage(Both, Info, " [rotateImage] Input: %dx%d, Angle: %.3f°", src.Cols(), src.Rows(), angleDegrees) + + // Get image center + center := image.Pt(src.Cols()/2, src.Rows()/2) + logMessage(Both, Info, " [rotateImage] Center: (%d,%d)", center.X, center.Y) + + // Get rotation matrix (angle in degrees, scale=1.0) + rotMat := gocv.GetRotationMatrix2D(center, angleDegrees, 1.0) + defer rotMat.Close() + + logMessage(Both, Info, " [rotateImage] Rotation matrix created") + + // Apply rotation + rotated := gocv.NewMat() + gocv.WarpAffineWithParams(src, &rotated, rotMat, image.Pt(src.Cols(), src.Rows()), + gocv.InterpolationLinear, gocv.BorderConstant, color.RGBA{0, 0, 0, 0}) + + logMessage(Both, Info, " [rotateImage] Output: %dx%d, Empty: %v", rotated.Cols(), rotated.Rows(), rotated.Empty()) + + return rotated +} + +// calculateRotationAngle determines the rotation angle needed to straighten the frame +// by analyzing the bottom horizontal boxes (status indicators below 80% Y) +func calculateRotationAngle(preprocessed gocv.Mat) float64 { + const ROTATION_THRESHOLD = 0.5 // Only rotate if angle > 0.5 degrees + + // Threshold to find contours + gray := gocv.NewMat() + gocv.CvtColor(preprocessed, &gray, gocv.ColorBGRToGray) + defer gray.Close() + + thresh := gocv.NewMat() + gocv.Threshold(gray, &thresh, 240, 255, gocv.ThresholdBinary) + defer thresh.Close() + + // Find all contours + contours := gocv.FindContours(thresh, gocv.RetrievalList, gocv.ChainApproxSimple) + defer contours.Close() + + if contours.Size() == 0 { + logMessage(Both, Warning, " [ROTATION] No contours found - no rotation") + return 0.0 + } + + logMessage(Both, Info, " [ROTATION] Found %d contours", contours.Size()) + + // FIRST: Calculate bounding box from ALL contours (no filtering) + // This gives us the true extent of the display + if contours.Size() == 0 { + logMessage(Both, Warning, " [ROTATION] No contours - cannot calculate bounding box") + return 0.0 + } + + firstRect := gocv.BoundingRect(contours.At(0)) + minX := firstRect.Min.X + minY := firstRect.Min.Y + maxX := firstRect.Max.X + maxY := firstRect.Max.Y + + for i := 1; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + if rect.Min.X < minX { minX = rect.Min.X } + if rect.Min.Y < minY { minY = rect.Min.Y } + if rect.Max.X > maxX { maxX = rect.Max.X } + if rect.Max.Y > maxY { maxY = rect.Max.Y } + } + + boundingBoxHeight := maxY - minY + logMessage(Both, Info, " [ROTATION] Bounding box from ALL contours: Y=%d-%d (H=%d)", minY, maxY, boundingBoxHeight) + + // THEN: Get bounding boxes for filtered contours (for rotation calculation) + var boxes []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + // Filter out tiny boxes + if rect.Dx() > 20 && rect.Dy() > 10 { + boxes = append(boxes, rect) + } + } + + logMessage(Both, Info, " [ROTATION] %d boxes after size filter (W>20, H>10)", len(boxes)) + + if len(boxes) < 3 { + logMessage(Both, Warning, " [ROTATION] Only %d boxes found - need at least 3", len(boxes)) + return 0.0 + } + + // Calculate 80% Y threshold relative to bounding box (bottom 20% of bounding box) + threshold80 := minY + int(float64(boundingBoxHeight)*0.8) + logMessage(Both, Info, " [ROTATION] 80%%%% threshold: Y>%d", threshold80) + + // Filter to boxes in bottom 20% (Y > 80% of height) + var bottomBoxes []image.Rectangle + for _, box := range boxes { + // Use box center Y for filtering + centerY := box.Min.Y + box.Dy()/2 + if centerY > threshold80 { + bottomBoxes = append(bottomBoxes, box) + } + } + + logMessage(Both, Info, " [ROTATION] %d boxes in bottom 20%%%% (Y>%d)", len(bottomBoxes), threshold80) + + if len(bottomBoxes) < 3 { + logMessage(Both, Warning, " [ROTATION] Only %d boxes in bottom 20%%%% - need at least 3", len(bottomBoxes)) + return 0.0 + } + + // Sort by Y coordinate (descending = bottom to top) to get lowest boxes + sort.Slice(bottomBoxes, func(i, j int) bool { + return bottomBoxes[i].Min.Y > bottomBoxes[j].Min.Y + }) + + // Take bottom 3 boxes (highest Y coordinates) + topBoxes := bottomBoxes[:3] + logMessage(Both, Info, " [ROTATION] Using 3 lowest boxes:") + for i, box := range topBoxes { + centerY := box.Min.Y + box.Dy()/2 + logMessage(Both, Info, " Box %d: X=%d-%d, Y=%d-%d (centerY=%d)", + i, box.Min.X, box.Max.X, box.Min.Y, box.Max.Y, centerY) + } + + // DEBUG: Save visualization of the three boxes used for rotation + if DEBUG_MODE { + rotDebugVis := preprocessed.Clone() // Clone the colored frame + + // Create bounding box rectangle from calculated values + boundingBox := image.Rect(minX, minY, maxX, maxY) + + // Draw bounding box in blue + gocv.Rectangle(&rotDebugVis, boundingBox, color.RGBA{0, 100, 255, 255}, 3) + + // Draw 80% threshold line in red (this is what we use to filter boxes) + gocv.Line(&rotDebugVis, + image.Pt(0, threshold80), + image.Pt(preprocessed.Cols(), threshold80), + color.RGBA{255, 0, 0, 255}, 3) // Red line at threshold + + // Draw the three selected boxes in green + for _, box := range topBoxes { + gocv.Rectangle(&rotDebugVis, box, color.RGBA{0, 255, 0, 255}, 3) + } + + // Add labels + gocv.PutText(&rotDebugVis, + fmt.Sprintf("80%%%% threshold: Y=%d", threshold80), + image.Pt(10, threshold80-10), + gocv.FontHersheyPlain, 1.5, color.RGBA{255, 0, 0, 255}, 2) + gocv.PutText(&rotDebugVis, + fmt.Sprintf("BBox: %dx%d (Y=%d-%d)", boundingBox.Dx(), boundingBox.Dy(), minY, maxY), + image.Pt(minX+10, minY+30), + gocv.FontHersheyPlain, 1.5, color.RGBA{0, 100, 255, 255}, 2) + + gocv.IMWrite("test_output/rotation_boxes_BEFORE.png", rotDebugVis) + rotDebugVis.Close() + logMessage(Both, Info, " [ROTATION] Saved rotation_boxes_BEFORE.png") + } + + // Find leftmost and rightmost boxes + var leftBox, rightBox image.Rectangle + leftBox = topBoxes[0] + rightBox = topBoxes[0] + + for _, box := range topBoxes[1:] { + if box.Min.X < leftBox.Min.X { + leftBox = box + } + if box.Min.X > rightBox.Min.X { + rightBox = box + } + } + + // Get center Y of each box + leftY := leftBox.Min.Y + leftBox.Dy()/2 + rightY := rightBox.Min.Y + rightBox.Dy()/2 + leftX := leftBox.Min.X + leftBox.Dx()/2 + rightX := rightBox.Min.X + rightBox.Dx()/2 + + logMessage(Both, Info, " [ROTATION] Left box center: (%d,%d), Right box center: (%d,%d)", leftX, leftY, rightX, rightY) + + // Calculate rotation angle + if rightX == leftX { + logMessage(Both, Info, " [ROTATION] Boxes vertically aligned - no rotation") + return 0.0 + } + + slope := float64(rightY-leftY) / float64(rightX-leftX) + angleDeg := math.Atan(slope) * 180.0 / math.Pi + + logMessage(Both, Info, " [ROTATION] Slope=%.4f, Angle=%.3f°", slope, angleDeg) + + // Apply threshold + if angleDeg > -ROTATION_THRESHOLD && angleDeg < ROTATION_THRESHOLD { + logMessage(Both, Info, " [ROTATION] Angle %.3f° below threshold %.1f° - no rotation", angleDeg, ROTATION_THRESHOLD) + return 0.0 + } + + logMessage(Both, Info, " [ROTATION] Will rotate by %.3f°", angleDeg) + return angleDeg +} + +func detectScreenLayoutAreas(rotated gocv.Mat) (*ScreenLayout, float64, error) { + // Correct order: Find center region FIRST (using all contours), THEN find digit displays + + startTime := time.Now() + + // Input is already thresholded binary image - work directly on it + // No grayscale, no threshold - just find contours + + // DEBUG: Save input to see what we're working with + if DEBUG_MODE { + gocv.IMWrite("test_output/layout_step1_input.png", rotated) + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step1_input.png") + } + + // STEP 3: Apply erosion to separate nearby elements (5x5 kernel) - COMMENTED OUT + // step3Start := time.Now() + // kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Pt(5, 5)) + // eroded := gocv.NewMat() + // gocv.Erode(thresh, &eroded, kernel) + // kernel.Close() + // defer eroded.Close() + // fmt.Printf(" [TIMING] Step 3 (Erosion): %dms\n", time.Since(step3Start).Milliseconds()) + + // STEP 2: Find contours - use RetrievalList to get ALL contours including nested + step2Start := time.Now() + contours := gocv.FindContours(rotated, gocv.RetrievalList, gocv.ChainApproxSimple) + defer contours.Close() + + logMessage(Both, Info, " Found %d total contours", contours.Size()) + if contours.Size() == 0 { + logMessage(Both, Error, " ERROR: No contours found in thresholded frame") + logMessage(Both, Error, " Frame dimensions: %dx%d", rotated.Cols(), rotated.Rows()) + // Save debug image + gocv.IMWrite("test_output/error_input.png", rotated) + logMessage(Both, Error, " Saved debug image to test_output/error_input.png") + return nil, 0, nil + } + + logMessage(LogFile, Debug, " [TIMING] Step 2 (Find contours): %dms", time.Since(step2Start).Milliseconds()) + logMessage(LogFile, Debug, " [TIMING] Step 2 (Total): %dms", time.Since(startTime).Milliseconds()) + + // STEP 1: Collect ALL boxes (no filter) and print details + var allBoxes []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + allBoxes = append(allBoxes, rect) + // fmt.Printf(" Box %d: X=%d-%d (W=%d), Y=%d-%d (H=%d)\n", + // i, rect.Min.X, rect.Max.X, rect.Dx(), + // rect.Min.Y, rect.Max.Y, rect.Dy()) + } + + if len(allBoxes) == 0 { + logMessage(Both, Error, " ERROR: No bounding boxes found from contours") + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d boxes (all, no filter)", len(allBoxes)) + logMessage(LogFile, Debug, "") + + // DEBUG: Skip step 6a and 6b visualizations + // rawContoursVis := rotated.Clone() + // ... + // allBoxesVis := rotated.Clone() + // ... + + // STEP 2: Filter to significant boxes (width or height > 30px) for processing + var significantBoxes []image.Rectangle + for _, box := range allBoxes { + if box.Dx() > 30 || box.Dy() > 30 { + significantBoxes = append(significantBoxes, box) + } + } + + if len(significantBoxes) == 0 { + logMessage(Both, Error, " ERROR: No significant boxes found (>30px)") + logMessage(Both, Error, " All %d boxes were too small", len(allBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d significant boxes (>30px) for processing", len(significantBoxes)) + + // STEP 3: Calculate bounding box from ALL boxes (not just significant ones) + // This ensures bottom small numbers are included in the height calculation + minX := allBoxes[0].Min.X + minY := allBoxes[0].Min.Y + maxX := allBoxes[0].Max.X + maxY := allBoxes[0].Max.Y + + for _, box := range allBoxes { + if box.Min.X < minX { + minX = box.Min.X + } + if box.Min.Y < minY { + minY = box.Min.Y + } + if box.Max.X > maxX { + maxX = box.Max.X + } + if box.Max.Y > maxY { + maxY = box.Max.Y + } + } + + boundingBox := image.Rect(minX, minY, maxX, maxY) + boundingBoxWidth := boundingBox.Dx() + logMessage(LogFile, Debug, " Bounding box from all boxes: X=%d-%d, Y=%d-%d, Width=%d", minX, maxX, minY, maxY, boundingBoxWidth) + + // Calculate required scale to normalize to 860px width + const TARGET_WIDTH = 860 + scale := float64(TARGET_WIDTH) / float64(boundingBoxWidth) + logMessage(Both, Info, " šŸ“Š Bounding box width: %dpx, Scale factor: %.3f (target: %dpx)", boundingBoxWidth, scale, TARGET_WIDTH) + + // STEP 4: Scale the frame to 860px width + step4Start := time.Now() + newWidth := int(float64(rotated.Cols()) * scale) + newHeight := int(float64(rotated.Rows()) * scale) + scaled := gocv.NewMat() + gocv.Resize(rotated, &scaled, image.Pt(newWidth, newHeight), 0, 0, gocv.InterpolationLinear) + defer scaled.Close() + logMessage(LogFile, Debug, " [TIMING] Step 4 (Scale frame): %dms", time.Since(step4Start).Milliseconds()) + logMessage(LogFile, Debug, " Scaled frame: %dx%d -> %dx%d", rotated.Cols(), rotated.Rows(), newWidth, newHeight) + + // CRITICAL: Scale the bounding box coordinates to match the scaled frame + scaledMinX := int(float64(minX) * scale) + scaledMaxX := int(float64(maxX) * scale) + scaledMinY := int(float64(minY) * scale) + scaledMaxY := int(float64(maxY) * scale) + scaledBoundingBox := image.Rect(scaledMinX, scaledMinY, scaledMaxX, scaledMaxY) + logMessage(LogFile, Debug, " Scaled bounding box: X=%d-%d, Y=%d-%d (from original: X=%d-%d, Y=%d-%d)", + scaledMinX, scaledMaxX, scaledMinY, scaledMaxY, minX, maxX, minY, maxY) + + // Scale all significant boxes coordinates too + var scaledSignificantBoxes []image.Rectangle + for _, box := range significantBoxes { + scaledBox := image.Rect( + int(float64(box.Min.X)*scale), + int(float64(box.Min.Y)*scale), + int(float64(box.Max.X)*scale), + int(float64(box.Max.Y)*scale), + ) + scaledSignificantBoxes = append(scaledSignificantBoxes, scaledBox) + } + + // Scale all boxes too (for visualization) + var scaledAllBoxes []image.Rectangle + for _, box := range allBoxes { + scaledBox := image.Rect( + int(float64(box.Min.X)*scale), + int(float64(box.Min.Y)*scale), + int(float64(box.Max.X)*scale), + int(float64(box.Max.Y)*scale), + ) + scaledAllBoxes = append(scaledAllBoxes, scaledBox) + } + + // All subsequent processing now works on the SCALED frame with SCALED coordinates + // This ensures boxes are calculated in scaled coordinates + + // STEP 5: Find 50% line + height := scaledBoundingBox.Dy() + line50 := scaledBoundingBox.Min.Y + height/2 + logMessage(LogFile, Debug, " 50%% line at Y=%d", line50) + + // DEBUG: Save step 5 visualization - bounding box and 50% line + bboxVis := gocv.NewMat() + gocv.CvtColor(scaled, &bboxVis, gocv.ColorGrayToBGR) + // Draw bounding box in blue + gocv.Rectangle(&bboxVis, scaledBoundingBox, color.RGBA{0, 100, 255, 255}, 3) + // Draw 50% line in red + gocv.Line(&bboxVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 3) + // Add labels + gocv.PutText(&bboxVis, fmt.Sprintf("BBox: %d-%d, %d-%d", scaledMinX, scaledMaxX, scaledMinY, scaledMaxY), + image.Pt(scaledMinX+10, scaledMinY+30), + gocv.FontHersheyPlain, 1.2, color.RGBA{0, 100, 255, 255}, 2) + gocv.PutText(&bboxVis, fmt.Sprintf("50%% line: Y=%d", line50), + image.Pt(scaledMinX+10, line50-10), + gocv.FontHersheyPlain, 1.2, color.RGBA{255, 0, 0, 255}, 2) + gocv.IMWrite("test_output/layout_step5_bbox_and_line.png", bboxVis) + bboxVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step5_bbox_and_line.png") + + // DEBUG: Save step 6 - all boxes with numbers + allBoxesVis := gocv.NewMat() + gocv.CvtColor(scaled, &allBoxesVis, gocv.ColorGrayToBGR) + for i, box := range scaledAllBoxes { + gocv.Rectangle(&allBoxesVis, box, color.RGBA{0, 255, 0, 255}, 2) + // Add box number + gocv.PutText(&allBoxesVis, fmt.Sprintf("%d", i), + image.Pt(box.Min.X+5, box.Min.Y+15), + gocv.FontHersheyPlain, 1.2, color.RGBA{0, 255, 0, 255}, 2) + } + gocv.IMWrite("test_output/layout_step6_all_boxes.png", allBoxesVis) + allBoxesVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step6_all_boxes.png") + + // STEP 6: Filter to boxes crossing 50% line (center region) + var centerBoxes []image.Rectangle + for _, box := range scaledSignificantBoxes { + if box.Min.Y < line50 && box.Max.Y > line50 { + centerBoxes = append(centerBoxes, box) + } + } + + if len(centerBoxes) == 0 { + logMessage(Both, Error, " ERROR: No boxes crossing 50%%%% line") + logMessage(Both, Error, " 50%%%% line at Y=%d, checked %d significant boxes", line50, len(scaledSignificantBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d boxes crossing 50%% line (center region)", len(centerBoxes)) + + // DEBUG: Save step 7 - center boxes visualization + centerBoxesVis := gocv.NewMat() + gocv.CvtColor(scaled, ¢erBoxesVis, gocv.ColorGrayToBGR) + // Draw 50% line + gocv.Line(¢erBoxesVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 3) + // Draw center boxes + for _, box := range centerBoxes { + gocv.Rectangle(¢erBoxesVis, box, color.RGBA{0, 255, 255, 255}, 2) + } + gocv.IMWrite("test_output/layout_step7_center_boxes.png", centerBoxesVis) + centerBoxesVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step7_center_boxes.png") + + // STEP 7: From center boxes, identify digit displays (height >= 110px) + var digitDisplays []image.Rectangle + for _, box := range centerBoxes { + logMessage(LogFile, Debug, " Center box: X=%d-%d (W=%d), Y=%d-%d (H=%d)", + box.Min.X, box.Max.X, box.Dx(), + box.Min.Y, box.Max.Y, box.Dy()) + if box.Dy() >= 110 { + digitDisplays = append(digitDisplays, box) + logMessage(LogFile, Debug, " āœ“ Added as digit display (H=%d >= 110)", box.Dy()) + } else { + logMessage(LogFile, Debug, " āœ— Skipped (H=%d < 110)", box.Dy()) + } + } + + if len(digitDisplays) == 0 { + logMessage(Both, Error, " ERROR: No digit displays found (height >= 110px) in center region") + logMessage(Both, Error, " Found %d center boxes, none met height requirement", len(centerBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d digit displays (height > 120px) BEFORE trimming", len(digitDisplays)) + + // DEBUG: Save step 8 - digit displays visualization + if DEBUG_MODE { + digitDisplaysVis := gocv.NewMat() + gocv.CvtColor(scaled, &digitDisplaysVis, gocv.ColorGrayToBGR) + for _, box := range digitDisplays { + gocv.Rectangle(&digitDisplaysVis, box, color.RGBA{255, 0, 255, 255}, 3) + } + gocv.IMWrite("test_output/layout_step8_digit_displays.png", digitDisplaysVis) + digitDisplaysVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step8_digit_displays.png") + } + + // STEP 8: Get Y range from digit displays + minDigitY := digitDisplays[0].Min.Y + maxDigitY := digitDisplays[0].Max.Y + for _, box := range digitDisplays { + if box.Min.Y < minDigitY { + minDigitY = box.Min.Y + } + if box.Max.Y > maxDigitY { + maxDigitY = box.Max.Y + } + } + logMessage(LogFile, Debug, " Digit display Y range: %d-%d", minDigitY, maxDigitY) + + // STEP 9: Find ALL boxes (from scaledAllBoxes) within this Y range + var boxesInRange []image.Rectangle + for _, box := range scaledAllBoxes { + // Box overlaps with Y range if its bottom is below minDigitY and top is above maxDigitY + if box.Max.Y > minDigitY && box.Min.Y < maxDigitY { + boxesInRange = append(boxesInRange, box) + } + } + logMessage(LogFile, Debug, " Found %d boxes in Y range %d-%d", len(boxesInRange), minDigitY, maxDigitY) + + // DEBUG: Save step 9 - boxes in range visualization + boxesInRangeVis := gocv.NewMat() + gocv.CvtColor(scaled, &boxesInRangeVis, gocv.ColorGrayToBGR) + // Draw Y range lines + gocv.Line(&boxesInRangeVis, image.Pt(0, minDigitY), image.Pt(boxesInRangeVis.Cols(), minDigitY), color.RGBA{255, 0, 0, 255}, 2) + gocv.Line(&boxesInRangeVis, image.Pt(0, maxDigitY), image.Pt(boxesInRangeVis.Cols(), maxDigitY), color.RGBA{255, 0, 0, 255}, 2) + // Draw digit displays in magenta + for _, box := range digitDisplays { + gocv.Rectangle(&boxesInRangeVis, box, color.RGBA{255, 0, 255, 255}, 3) + } + // Draw all boxes in range in cyan + for _, box := range boxesInRange { + gocv.Rectangle(&boxesInRangeVis, box, color.RGBA{0, 255, 255, 255}, 1) + } + gocv.IMWrite("test_output/layout_step9_boxes_in_range.png", boxesInRangeVis) + boxesInRangeVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step9_boxes_in_range.png") + + // STEP 10: Trim digit displays based on small boxes crossing their right edge + for i := range digitDisplays { + originalMaxX := digitDisplays[i].Max.X + newMaxX := originalMaxX + + // Check each box in range + for _, smallBox := range boxesInRange { + // Skip if this is the digit display itself + if smallBox == digitDisplays[i] { + continue + } + + // Check if small box crosses the right edge of this digit display + // Crosses if: smallBox.Min.X < digitDisplay.Max.X AND smallBox.Max.X > digitDisplay.Max.X + if smallBox.Min.X < digitDisplays[i].Max.X && smallBox.Max.X > digitDisplays[i].Max.X { + // Use the left edge of the small box as the new right edge + if smallBox.Min.X < newMaxX { + newMaxX = smallBox.Min.X + logMessage(LogFile, Debug, " Trimming digit display %d: right edge %d -> %d (small box at X=%d-%d)", + i, originalMaxX, newMaxX, smallBox.Min.X, smallBox.Max.X) + } + } + } + + digitDisplays[i].Max.X = newMaxX + } + + logMessage(LogFile, Debug, " Digit displays AFTER trimming: %d", len(digitDisplays)) + + // DEBUG: Save step 11 - trimmed digit displays visualization + trimmedVis := gocv.NewMat() + gocv.CvtColor(scaled, &trimmedVis, gocv.ColorGrayToBGR) + for _, box := range digitDisplays { + gocv.Rectangle(&trimmedVis, box, color.RGBA{0, 255, 0, 255}, 4) + } + gocv.IMWrite("test_output/layout_step11_trimmed_displays.png", trimmedVis) + trimmedVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step11_trimmed_displays.png") + + if len(digitDisplays) < 2 { + // Save debug visualization showing what was found + debugVis := gocv.NewMat() + gocv.CvtColor(scaled, &debugVis, gocv.ColorGrayToBGR) + + // Draw 50% line + gocv.Line(&debugVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 2) + gocv.PutText(&debugVis, fmt.Sprintf("50%% line: Y=%d", line50), + image.Pt(10, line50-10), gocv.FontHersheyPlain, 1.5, color.RGBA{255, 0, 0, 255}, 2) + + // Draw all center boxes with labels + for i, box := range centerBoxes { + boxColor := color.RGBA{0, 255, 255, 255} // Cyan for rejected + label := fmt.Sprintf("#%d: H=%d", i, box.Dy()) + + // Check if this box qualified as digit display + qualified := false + for _, dd := range digitDisplays { + if box == dd { + qualified = true + break + } + } + + if qualified { + boxColor = color.RGBA{0, 255, 0, 255} // Green for qualified + label += " OK" + } else if box.Dy() < 110 { + label += " TOO SHORT" + } + + gocv.Rectangle(&debugVis, box, boxColor, 3) + gocv.PutText(&debugVis, label, + image.Pt(box.Min.X+5, box.Min.Y+25), + gocv.FontHersheyPlain, 1.5, boxColor, 2) + } + + // Add summary at top + summary := fmt.Sprintf("Found: %d center boxes, %d digit displays (need 2)", + len(centerBoxes), len(digitDisplays)) + gocv.PutText(&debugVis, summary, + image.Pt(10, 30), gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 255, 255}, 2) + gocv.PutText(&debugVis, "Requirement: Height >= 110px", + image.Pt(10, 60), gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 255, 255}, 2) + + timestamp := time.Now().Format("20060102_150405") + errorFilename := fmt.Sprintf("test_output/layout_error_%s.png", timestamp) + gocv.IMWrite(errorFilename, debugVis) + debugVis.Close() + + // Enhanced error message + logMessage(Both, Error, " ERROR: Only %d digit display(s) found (need 2)", len(digitDisplays)) + logMessage(Both, Error, " ") + logMessage(Both, Error, " LIKELY CAUSE: Pulse oximeter is not centered under camera") + logMessage(Both, Error, " - Device may be at an angle or offset to one side") + logMessage(Both, Error, " - Only one display (SpO2 or HR) is in the center detection region") + logMessage(Both, Error, " ") + logMessage(Both, Error, " ACTION REQUIRED: Physically reposition camera or device") + logMessage(Both, Error, " - Move device to be centered and level under camera") + logMessage(Both, Error, " - Or adjust camera angle to capture both displays") + logMessage(Both, Error, " ") + logMessage(Both, Error, " Technical details:") + logMessage(Both, Error, " Center boxes found: %d (crossing 50%% line at Y=%d)", len(centerBoxes), line50) + for i, box := range centerBoxes { + qualified := "" + for _, dd := range digitDisplays { + if box == dd { + qualified = " āœ“ QUALIFIED" + break + } + } + if qualified == "" { + if box.Dy() < 110 { + qualified = " āœ— TOO SHORT (< 110px)" + } + } + logMessage(Both, Error, " Box #%d: W=%dpx, H=%dpx at X=%d-%d, Y=%d-%d%s", + i, box.Dx(), box.Dy(), box.Min.X, box.Max.X, box.Min.Y, box.Max.Y, qualified) + } + logMessage(Both, Error, " šŸ’¾ Debug image saved: %s", errorFilename) + logMessage(Both, Error, " Green boxes = qualified, Cyan = rejected") + return nil, 0, nil + } + + // STEP 11: Get Y range from digit displays + minCenterY := digitDisplays[0].Min.Y + maxCenterY := digitDisplays[0].Max.Y + + for _, box := range digitDisplays { + if box.Min.Y < minCenterY { + minCenterY = box.Min.Y + } + if box.Max.Y > maxCenterY { + maxCenterY = box.Max.Y + } + } + + // Create center region + centerRegion := image.Rect(scaledBoundingBox.Min.X, minCenterY, scaledBoundingBox.Max.X, maxCenterY) + + // STEP 12: Find X-center to split left/right + centerX := centerRegion.Min.X + centerRegion.Dx()/2 + logMessage(LogFile, Debug, " Center X: %d", centerX) + + // STEP 13: Find rightmost X and Y range in each half + spo2RightX := -1 + spo2MinY := 10000 + spo2MaxY := 0 + hrRightX := -1 + hrMinY := 10000 + hrMaxY := 0 + + for _, box := range digitDisplays { + // Determine which half this box belongs to based on its center + boxCenterX := box.Min.X + box.Dx()/2 + + if boxCenterX < centerX { + // Left half (SpO2) + if box.Max.X > spo2RightX { + spo2RightX = box.Max.X + } + if box.Min.Y < spo2MinY { + spo2MinY = box.Min.Y + } + if box.Max.Y > spo2MaxY { + spo2MaxY = box.Max.Y + } + } else { + // Right half (HR) + if box.Max.X > hrRightX { + hrRightX = box.Max.X + } + if box.Min.Y < hrMinY { + hrMinY = box.Min.Y + } + if box.Max.Y > hrMaxY { + hrMaxY = box.Max.Y + } + } + } + + if spo2RightX == -1 || hrRightX == -1 { + logMessage(Both, Error, " ERROR: Could not find displays in both halves") + logMessage(Both, Error, " Left half (SpO2): rightX=%d, Right half (HR): rightX=%d", spo2RightX, hrRightX) + return nil, 0, nil + } + + // STEP 14: Create boxes with fixed CUT_WIDTH + spo2LeftX := spo2RightX - CUT_WIDTH + spo2Box := image.Rect(spo2LeftX, spo2MinY, spo2RightX, spo2MaxY) + + hrLeftX := hrRightX - CUT_WIDTH + hrBox := image.Rect(hrLeftX, hrMinY, hrRightX, hrMaxY) + + logMessage(LogFile, Debug, " Final SpO2 box: X=%d-%d, Y=%d-%d", spo2Box.Min.X, spo2Box.Max.X, spo2Box.Min.Y, spo2Box.Max.Y) + logMessage(LogFile, Debug, " Final HR box: X=%d-%d, Y=%d-%d", hrBox.Min.X, hrBox.Max.X, hrBox.Min.Y, hrBox.Max.Y) + + // DEBUG: Save step 15 - final boxes visualization + finalVis := gocv.NewMat() + gocv.CvtColor(scaled, &finalVis, gocv.ColorGrayToBGR) + gocv.Rectangle(&finalVis, spo2Box, color.RGBA{255, 0, 0, 255}, 4) + gocv.Rectangle(&finalVis, hrBox, color.RGBA{0, 255, 255, 255}, 4) + gocv.PutText(&finalVis, "SpO2", image.Pt(spo2Box.Min.X, spo2Box.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, color.RGBA{255, 0, 0, 255}, 2) + gocv.PutText(&finalVis, "HR", image.Pt(hrBox.Min.X, hrBox.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, color.RGBA{0, 255, 255, 255}, 2) + gocv.IMWrite("test_output/layout_step15_final_boxes.png", finalVis) + finalVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step15_final_boxes.png") + + logMessage(LogFile, Debug, " [TIMING] Total layout detection: %dms", time.Since(startTime).Milliseconds()) + + // Return layout and scale factor + return &ScreenLayout{ + SpO2Area: spo2Box, + HRArea: hrBox, + }, scale, nil +} + +// Old functions kept for backward compatibility but deprecated +func groupAdjacentContours(rects []image.Rectangle) []Display { + var displays []Display + used := make([]bool, len(rects)) + + logMessage(LogFile, Debug, " Grouping %d candidate contours:", len(rects)) + for i := 0; i < len(rects); i++ { + if used[i] { + continue + } + + // Look for an adjacent rect with similar Y coordinate + var adjacent *int + for j := i + 1; j < len(rects); j++ { + if used[j] { + continue + } + + // Check if Y coordinates are similar (within 20 pixels) + yDiff := rects[i].Min.Y - rects[j].Min.Y + if yDiff < 0 { + yDiff = -yDiff + } + if yDiff > 20 { + logMessage(LogFile, Debug, " Skip pairing rect[%d] and rect[%d]: Y diff=%d > 20", i, j, yDiff) + continue + } + + // Check if they're side-by-side (allow small overlap or gap) + xGap := rects[j].Min.X - rects[i].Max.X + // Allow up to 50 pixels of overlap (negative gap) or up to 200 pixels of space + if xGap >= -50 && xGap < 200 { + logMessage(LogFile, Debug, " Pairing rect[%d] and rect[%d]: Y diff=%d, X gap=%d", i, j, yDiff, xGap) + adjacent = &j + break + } else { + logMessage(LogFile, Debug, " Skip pairing rect[%d] and rect[%d]: X gap=%d not in range [-50, 200)", i, j, xGap) + } + } + + if adjacent != nil { + // Two separate contours - use them as left and right + used[i] = true + used[*adjacent] = true + + left := rects[i] + right := rects[*adjacent] + + // Ensure left is actually on the left + if left.Min.X > right.Min.X { + left, right = right, left + } + + logMessage(LogFile, Debug, " DUAL: [%dx%d @(%d,%d)] + [%dx%d @(%d,%d)]", + left.Dx(), left.Dy(), left.Min.X, left.Min.Y, + right.Dx(), right.Dy(), right.Min.X, right.Min.Y) + + displays = append(displays, Display{ + IsSplit: false, + LeftRect: left, + RightRect: right, + }) + } else { + // Single contour - will be split in half + used[i] = true + + logMessage(LogFile, Debug, " SPLIT: [%dx%d @(%d,%d)]", + rects[i].Dx(), rects[i].Dy(), rects[i].Min.X, rects[i].Min.Y) + + displays = append(displays, Display{ + IsSplit: true, + FullRect: rects[i], + }) + } + } + + return displays +} + +func detectScreenLayout(rotated gocv.Mat) ([]Display, error) { + gray := gocv.NewMat() + gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray) + thresh := gocv.NewMat() + gocv.Threshold(gray, &thresh, 170, 255, gocv.ThresholdBinary) + gray.Close() + contours := gocv.FindContours(thresh, gocv.RetrievalExternal, gocv.ChainApproxSimple) + thresh.Close() + + var rects []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + if rect.Dy() > MIN_BOX_HEIGHT && rect.Dy() < 200 { + rects = append(rects, rect) + } + } + contours.Close() + + sort.Slice(rects, func(i, j int) bool { + return rects[i].Dy() > rects[j].Dy() + }) + + if len(rects) < 3 { + return nil, fmt.Errorf("found only %d contours (need 3+)", len(rects)) + } + + // Take up to 6 candidates to ensure we capture split digits + maxCands := 6 + if len(rects) < maxCands+1 { + maxCands = len(rects) - 1 + } + cands := rects[1 : 1+maxCands] + + // Sort by Y position for processing (Y desc, then X asc) + sort.Slice(cands, func(i, j int) bool { + if cands[i].Min.Y != cands[j].Min.Y { + return cands[i].Min.Y > cands[j].Min.Y + } + return cands[i].Min.X < cands[j].Min.X + }) + + logMessage(LogFile, Debug, " Selected %d candidates (excluding largest):", len(cands)) + for idx, rect := range cands { + logMessage(LogFile, Debug, " [%d]: %dx%d @(%d,%d)", idx, rect.Dx(), rect.Dy(), rect.Min.X, rect.Min.Y) + } + + // Group adjacent contours + displays := groupAdjacentContours(cands) + + logMessage(LogFile, Debug, " Found %d displays after grouping", len(displays)) + + // Group displays by Y position to find the digit row + // Displays within 30 pixels vertically are considered same row + type YGroup struct { + avgY int + displays []Display + } + var yGroups []YGroup + + for _, disp := range displays { + dispY := disp.FullRect.Min.Y + if !disp.IsSplit { + dispY = disp.LeftRect.Min.Y + } + + // Find existing group with similar Y + found := false + for i := range yGroups { + if math.Abs(float64(dispY-yGroups[i].avgY)) < 30 { + yGroups[i].displays = append(yGroups[i].displays, disp) + found = true + break + } + } + if !found { + yGroups = append(yGroups, YGroup{avgY: dispY, displays: []Display{disp}}) + } + } + + logMessage(LogFile, Debug, " Grouped into %d Y-bands:", len(yGroups)) + for idx, group := range yGroups { + logMessage(LogFile, Debug, " Band %d (Y~%d): %d displays", idx, group.avgY, len(group.displays)) + } + + // Find the band with exactly 2 displays (SpO2 and HR) + var digitDisplays []Display + for _, group := range yGroups { + if len(group.displays) == 2 { + digitDisplays = group.displays + break + } + } + + if len(digitDisplays) != 2 { + return nil, fmt.Errorf("could not find Y-band with exactly 2 displays (found %d bands)", len(yGroups)) + } + + // Sort by X position: left = SpO2, right = HR + sort.Slice(digitDisplays, func(i, j int) bool { + xI := digitDisplays[i].FullRect.Min.X + if !digitDisplays[i].IsSplit { + xI = digitDisplays[i].LeftRect.Min.X + } + xJ := digitDisplays[j].FullRect.Min.X + if !digitDisplays[j].IsSplit { + xJ = digitDisplays[j].LeftRect.Min.X + } + return xI < xJ + }) + + return digitDisplays, nil +} + +func saveLayoutVisualization(rotated gocv.Mat, layout *ScreenLayout, filename string) { + visualization := rotated.Clone() + + // Draw SpO2 box in red + red := color.RGBA{255, 0, 0, 255} + gocv.Rectangle(&visualization, layout.SpO2Area, red, 3) + gocv.PutText(&visualization, "SpO2", image.Pt(layout.SpO2Area.Min.X, layout.SpO2Area.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, red, 2) + + // Draw HR box in cyan + cyan := color.RGBA{0, 255, 255, 255} + gocv.Rectangle(&visualization, layout.HRArea, cyan, 3) + gocv.PutText(&visualization, "HR", image.Pt(layout.HRArea.Min.X, layout.HRArea.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, cyan, 2) + + gocv.IMWrite(filename, visualization) + visualization.Close() +} diff --git a/backups/backup_20251122/test_rotation.go b/backups/backup_20251122/test_rotation.go new file mode 100644 index 0000000..168a4b3 --- /dev/null +++ b/backups/backup_20251122/test_rotation.go @@ -0,0 +1,202 @@ +package main + +import ( + "fmt" + "log" + "os" + + "gocv.io/x/gocv" +) + +// Global DEBUG flag +var DEBUG = false + +func main() { + // Parse command line args + if len(os.Args) > 1 && os.Args[1] == "/debug" { + DEBUG = true + fmt.Println("DEBUG mode enabled\n") + } + + // Hardcode RTSP URL + streamURL := "rtsp://tapohass:!!Helder06@192.168.2.183:554/stream1" + + fmt.Println("Rotation Detection System") + fmt.Println("========================") + fmt.Println("Processing frames...\n") + + // Open RTSP stream + stream, err := gocv.OpenVideoCapture(streamURL) + if err != nil { + log.Fatalf("Failed to open stream: %v", err) + } + defer stream.Close() + + // Initialize detection state + rotation := 9999.0 // Sentinel value for "not detected" + width := 0 // 0 means "not detected" + + // Frame processing counters + framesSkipped := 0 + framesProcessed := 0 + detectionAttempts := 0 + totalFramesRead := 0 + SKIP_COUNT := 7 // Process every 8th frame + MAX_DETECTION_ATTEMPTS := 100 // Give up after 100 attempts + RE_DETECT_AFTER := 0 // Re-detect after N frames (0 = disabled) + + // Main processing loop + frame := gocv.NewMat() + defer frame.Close() + + for { + // Acquire a frame + if ok := stream.Read(&frame); !ok || frame.Empty() { + log.Fatal("Failed to read frame from stream") + } + totalFramesRead++ + + // Preprocess frame to binary + binary := PreprocessFrame(frame) + + // If rotation is 9999 or width is 0, detect bands/width/rotation + if rotation == 9999 || width == 0 { + detectionAttempts++ + + // Give up if too many attempts + if detectionAttempts > MAX_DETECTION_ATTEMPTS { + fmt.Printf("āŒ Detection failed after %d attempts. Exiting.\n", MAX_DETECTION_ATTEMPTS) + break + } + + result := DetectRotationAndWidth(binary) + if result.Success { + rotation = result.Rotation + width = result.Width + fmt.Printf("āœ“ DETECTION SUCCESSFUL (frame %d, attempt #%d): Width=%dpx, Rotation=%.3f°\n\n", + totalFramesRead, detectionAttempts, width, rotation) + detectionAttempts = 0 // Reset counter + } else { + // Show detection attempts with frame info + if detectionAttempts <= 5 { + if DEBUG { + fmt.Printf("Frame %d: Detection attempt #%d failed\n", totalFramesRead, detectionAttempts) + } + } else { + fmt.Printf("⚠ Frame %d: Detection attempt #%d failed - retrying...\n", totalFramesRead, detectionAttempts) + } + // Clean up and continue to next frame + binary.Close() + + // Skip a few frames to ensure display has changed + for skip := 0; skip < 3; skip++ { + if ok := stream.Read(&frame); !ok { + break + } + } + continue + } + } + + // If rotation != 9999 and width != 0 and we've skipped enough frames + if rotation != 9999 && width != 0 && framesSkipped >= SKIP_COUNT { + framesProcessed++ + + // Rotate the image to correct orientation + rotated := RotateImage(binary, rotation) + defer rotated.Close() + + // Calculate scale factor based on detected width + scaleFactor := 860.0 / float64(width) + + // Scale to 860px width using the scale factor + scaled := ScaleByFactor(rotated, scaleFactor) + defer scaled.Close() + + fmt.Printf("Frame #%d processed: Rotated %.3f°, Scaled from %dpx to 860px (factor: %.3f)\n", + framesProcessed, rotation, width, scaleFactor) + + // Re-detect bands on the scaled image + // Scale the 80px minimum height by the same factor + scaledMinHeight := int(80.0 * scaleFactor) + bands := DetectBands(scaled, scaledMinHeight) + + if DEBUG { + fmt.Printf(" Using scaled minimum height: %dpx (80px * %.3f)\n", scaledMinHeight, scaleFactor) + fmt.Printf(" Found %d bands on scaled image:\n", len(bands)) + for i, band := range bands { + fmt.Printf(" Band #%d: Y range [%d-%d], Height: %dpx\n", + i+1, band.minY, band.maxY, band.maxY-band.minY) + } + } + + // We should have exactly 2 bands: graph and digits + if len(bands) == 2 { + // First band should be graph (taller), second should be digits + graphBand := bands[0] + digitBand := bands[1] + + // Verify graph band is taller + graphHeight := graphBand.maxY - graphBand.minY + digitHeight := digitBand.maxY - digitBand.minY + + if graphHeight < digitHeight { + // Swap if needed + graphBand, digitBand = digitBand, graphBand + } + + fmt.Printf(" āœ“ Graph band: Y [%d-%d], Height: %dpx\n", + graphBand.minY, graphBand.maxY, graphBand.maxY-graphBand.minY) + fmt.Printf(" āœ“ Digit band: Y [%d-%d], Height: %dpx\n", + digitBand.minY, digitBand.maxY, digitBand.maxY-digitBand.minY) + + // Extract just the digit band region for OCR + digitRegion := ExtractBandRegion(scaled, digitBand) + defer digitRegion.Close() + + // Save visualization if in DEBUG mode + if DEBUG && framesProcessed <= 5 { + // Simple visualization with just 2 bands + VisualizeSimpleBands(scaled, graphBand, digitBand, + fmt.Sprintf("debug_frame_%03d_bands.png", framesProcessed)) + + // Save just the digit region + digitFilename := fmt.Sprintf("debug_frame_%03d_digits.png", framesProcessed) + gocv.IMWrite(digitFilename, digitRegion) + fmt.Printf("DEBUG: Saved digit region to %s\n", digitFilename) + } + + // TODO: Run OCR on digitRegion here + + } else { + fmt.Printf(" ⚠ Expected 2 bands, found %d bands (>%dpx height)\n", len(bands), scaledMinHeight) + } + + // Save debug images if in DEBUG mode + if DEBUG && framesProcessed <= 5 { + filename := fmt.Sprintf("debug_frame_%03d_scaled.png", framesProcessed) + gocv.IMWrite(filename, scaled) + fmt.Printf("DEBUG: Saved %s\n", filename) + } + + // TODO: Add OCR processing on 'scaled' image here in later phase + + framesSkipped = 0 + + // Optional: Force re-detection after N frames to adapt to changes + if RE_DETECT_AFTER > 0 && framesProcessed >= RE_DETECT_AFTER { + fmt.Printf("\nšŸ”„ Re-detecting after %d frames...\n", RE_DETECT_AFTER) + rotation = 9999 + width = 0 + framesProcessed = 0 + } + } else { + framesSkipped++ + } + + // Clean up + binary.Close() + } + + fmt.Println("\nProgram terminated.") +} diff --git a/backups/backup_20251125/PROJECT_STATE.md b/backups/backup_20251125/PROJECT_STATE.md new file mode 100644 index 0000000..e8071e0 --- /dev/null +++ b/backups/backup_20251125/PROJECT_STATE.md @@ -0,0 +1,1544 @@ +# PulseOx Monitor - Project State & Technical Documentation + +**Current Version:** v3.57 (True Raw Frame Saving) +**Last Updated:** November 17, 2025 +**Status:** Production - threshold after preprocessing, timestamp stays colored + +āš ļø **IMPORTANT:** Update VERSION constant in pulseox-monitor.go and this document with every build! + +**For AI Assistant:** You MUST increment the version number in both pulseox-monitor.go and this document whenever you make ANY code changes. This is not optional - it's a critical part of the workflow. + +--- + +## šŸ”§ For New Chat Sessions: Data Access + +**You have full filesystem access to `/Users/johanjongsma/pulse-monitor/`** + +Use these tools to work with the project: +- `Filesystem:read_file` - Read source code, logs, config files +- `Filesystem:write_file` - Create new files +- `Filesystem:edit_file` - Modify existing code (preferred for edits) +- `Filesystem:list_directory` - See what files exist +- `Filesystem:move_file` - Move/rename files (e.g., approving training digits) +- `Filesystem:search_files` - Find files by pattern + +**Key files you'll need:** +- Source: `pulseox-monitor.go`, `ocr.go`, `layout_detection.go`, etc. +- Config: `config.yaml` +- Training: `training_digits/*.png`, `training_digits/invalid/*.png` +- Output: `review/*.png`, `review/review.html` +- Logs: `pulseox-monitor_*.log` + +**DO NOT waste time asking how to access files - just use the filesystem tools directly!** + +--- + +## Project Overview + +This system monitors a Masimo Rad-G pulse oximeter by reading its display via RTSP camera feed. It uses computer vision (OpenCV/gocv) to perform OCR on the SpO2 and heart rate values, then posts readings to Home Assistant for tracking and alerting. + +### Why This Exists +The Masimo pulse oximeter doesn't have a data output port, so we use a camera pointed at its display. The display has significant overexposure and light leakage issues, making traditional OCR (Tesseract) unreliable. We developed a custom template-matching approach that handles these challenging conditions. + +--- + +## Architecture + +### Main Components + +1. **pulseox-monitor.go** - Main application (v3.10) + - Unified frame processing loop + - Interruptible sleep for graceful shutdown + - Smart escalation strategy + - Signal handling (Ctrl+C) + +2. **processor.go** - Frame processor + - OCR execution + - Result validation + - Home Assistant posting + +3. **validators.go** - Validation handlers + - Corruption detection + - Confidence checking + - Stability validation (hindsight) + +4. **ocr.go** - Recognition engine + - Template matching for digits 0-9 + - Invalid pattern detection (corruption) + - `hasOneAt()` function for detecting narrow '1' digits + - Dynamic width calculation for 2-digit vs 3-digit displays + +5. **layout_detection.go** - Display locator + - Finds SpO2 and HR display areas in frame + - Position-based detection (rightmost edge method) + - Handles digit fragmentation + +6. **normalize.go** - Frame normalization + - Screen width detection + - Scaling to 860px width + - Layout detection with normalization + +7. **frame_source.go** - Frame sources + - RTSP streaming source + - Single-file test source + - Interruptible via `IsActive()` method + +8. **types.go** - Shared data structures + - ProcessingState with ConsecutiveFailures counter + - Reading, ProcessingResult types + - Status and Action enums + +9. **homeassistant.go** - Data posting + - REST API integration + - Posts only when values change + - Separate sensors: `sensor.pulse_ox_spo2`, `sensor.pulse_ox_hr` + +10. **html_report.go** - Review interface + - Dark-themed HTML generated on shutdown + - Shows all processed frames with confidence scores + - Visual indicators for unstable readings + +11. **timestamp_ocr.go** - Timestamp validation + - Extracts camera timestamp from frame + - Uses Tesseract OCR + - Validates against server time + - Warns if lag >10 seconds + +### Data Flow + +``` +RTSP Stream / File → Frame Capture (every 4th frame) → +Timestamp validation (every 10th frame) on **COLORED** frame → +**Clone frame for error saving** (grayscale → threshold at 240 → save as rawThresholded) → +Preprocessing (crop 68px top, rotate 90° CW) on **COLORED** frame → +**THRESHOLD ONCE at 240** (grayscale → binary) AFTER preprocessing → +Layout detection (find contours on binary) → +Display area extraction (280x200px binary regions) → +Template matching OCR on binary → +Digit validation (8 & 0) → +Exception handling (saves rawThresholded on errors) → +Home Assistant posting +``` + +--- + +## Recognition System + +### Template Matching Approach + +We use a library of digit images extracted from the actual pulse oximeter display. Each digit (0-9) can have multiple template variants to handle slight variations. + +**Current templates:** +- Digits 0-9: Multiple variants per digit +- Invalid patterns: 2 templates in `training_digits/invalid/` + - `white_blob_1.png` + - `invalid_2.png` + +**Template location:** `/Users/johanjongsma/pulse-monitor/training_digits/` + +### Digit Width Detection + +The system handles both 2-digit (XX) and 3-digit (1XX) displays dynamically: + +- **Narrow '1' detection:** `hasOneAt(x)` checks if column X is 80%+ white and column X+10 is 80%+ black +- **Check positions:** + - Digit 3 (rightmost): `w-8` pixels from right edge + - Digit 2: Depends on digit 3 width (72px for '1', 100px otherwise) + - Digit 1: Depends on both digit 2 and 3 widths + +**Why -8 offset for digit 3?** +Originally used -5, but testing showed '1' digits needed detection slightly more to the left to catch the right edge reliably. + +### Display Positioning + +- **SpO2 and HR displays:** 234 pixels apart vertically +- **Layout detection:** Run once at startup, locked unless re-detection triggered +- **Frame processing:** Every 4th frame (~3.75 fps from 15fps stream) +- **Timestamp check:** Every 10th processed frame (~2.5 seconds) + +--- + +## Exception Handling System + +The system has three independent exception handlers that run in sequence: + +### 1. Confirmed Corruption (Priority: Highest) +**Trigger:** Any digit recognized as `-1` (matched invalid template with >70% confidence) + +**Behavior:** +- Log to file (silent to console) +- Skip corrupted frame +- Continue to next frame through normal processing +- No special retry, no layout re-detection + +**Why this approach:** +Corruption is a recognition failure, not a camera/lighting issue. The next frame will naturally be clean. + +**Invalid templates:** +Stored in `training_digits/invalid/` - patterns like white blobs, UI elements, or corrupted digits that should never be interpreted as valid numbers. + +### 2. Low OCR Confidence +**Trigger:** Average confidence < 85% for SpO2 or HR, OR negative values (unrecognized digits) + +**Behavior:** +1. **First occurrence:** + - Log "āš ļø Low confidence, retrying with next frame..." or "[UNRECOGNIZED] SpO2=X, HR=Y" + - Read next frame immediately + - Re-detect layout (handles camera movement/vibration) + - Process retry frame + - If successful (high confidence + values changed): perform stability check and post + +2. **Second consecutive low confidence:** + - Log "āš ļø Low confidence after retry, pausing 2 seconds" + - Give up, wait 2 seconds before continuing + +**Why re-detect layout:** +Low confidence or unrecognized digits often indicate camera movement, lighting changes, focus issues, or temporary blur. Re-detecting layout helps recover from these transient problems. + +### 3. Large Delta (Hindsight Validation) +**Trigger:** Values changed >3 points from last stable reading (e.g., SpO2: 95→92 or HR: 70→75) + +**Behavior:** +1. Hold new reading as "pending" (don't post yet) +2. Store baseline (value before the spike) +3. Wait for next reading +4. **If next reading confirms trend** (moves in same direction): + - Post the held reading + - Post the confirming reading +5. **If next reading contradicts** (moves opposite direction or returns to baseline): + - Discard held reading as glitch + - Post the new reading + +**Why hindsight validation:** +Physiological changes are gradual. Sudden spikes >3 points are usually recognition glitches or transient display artifacts. This prevents false alarms while allowing real trends. + +**Example scenario:** +``` +Baseline: SpO2=95 +Reading 1: SpO2=91 (Ī”4) → HOLD, don't post +Reading 2: SpO2=89 (Ī”2 from held) → Trend confirmed, post 91 then 89 +``` + +vs. + +``` +Baseline: SpO2=95 +Reading 1: SpO2=91 (Ī”4) → HOLD, don't post +Reading 2: SpO2=95 (back to baseline) → Discard 91 as glitch, post 95 +``` + +--- + +## Key Design Decisions + +### 1. Change-Based Posting Only +**Decision:** Only post when SpO2 or HR values actually change. + +**Rationale:** +- Reduces Home Assistant database bloat +- Avoids unnecessary network traffic +- Still captures all meaningful changes + +### 2. Template Matching Over Tesseract OCR +**Decision:** Use custom template matching instead of Tesseract. + +**Rationale:** +- Pulse oximeter display has severe overexposure +- Tesseract unreliable with bright backgrounds +- Template matching: 90%+ accuracy vs. Tesseract: ~60% +- We have stable digit shapes - perfect for template matching + +### 3. Fixed Layout After Startup +**Decision:** Detect layout once, lock it, only re-detect on low confidence retry. + +**Rationale:** +- Camera is stationary (mounted on tripod) +- Display position doesn't change +- Layout detection is expensive (~10-20ms) +- Re-detection only needed if camera moves/vibrates + +### 4. No Immediate Corruption Retry +**Decision:** Don't immediately retry after detecting corruption; just skip to next frame. + +**Rationale:** +- Corruption is a recognition issue, not camera issue +- Next frame (133ms later at 15fps) will naturally be different +- Avoids complexity of retry-within-retry scenarios +- Simpler code, same reliability + +### 5. Processing Every 4th Frame +**Decision:** Process every 4th frame (~3.75 fps from 15fps stream). + +**Rationale:** +- Balance between responsiveness and CPU usage +- SpO2/HR don't change faster than ~1 reading/second physiologically +- Allows time for processing, posting, and stability checks +- Adjusted for 15fps camera (was 6th frame for 30fps) +- Can be adjusted: 3rd frame = ~5fps, 5th frame = ~3fps + +### 6. Dark Theme HTML Review +**Decision:** Dark background with light text in review HTML. + +**Rationale:** +- Easier on eyes during long review sessions +- Better contrast for confidence scores +- Color-coded indicators more visible + +### 7. No FPS Display +**Decision:** Don't display stream FPS statistics. + +**Rationale:** +- Stream FPS is constant (30fps from camera) +- FPS logging adds noise to console output +- Processing rate (every 6th frame) is more relevant than raw stream FPS +- Keeps console focused on actual readings and warnings + +### 8. Digit Validation for '8' and '0' +**Decision:** Validate recognized '8' and '0' digits by checking for characteristic holes. + +**Rationale:** +- Template matching can falsely recognize corrupted/partial digits as '8' or '0' +- '8' validation: checks for TWO holes (at 30% and 70% Y, scanning 40-50% X) +- '0' validation: checks for ONE hole (at 50% Y, scanning 30-70% X) +- Wider scan range for '0' (30-70%) than '8' (40-50%) because '0' hole is larger +- If validation fails, digit marked as -1 (invalid) → triggers retry/re-detection +- Prevents false alarms from misrecognized digits +- Simple pixel-based validation is fast and reliable + +### 9. Alarm Mode Decolorization +**Decision:** Pre-process colored alarm backgrounds before OCR to normalize them to black/white. + +**Rationale:** +- Pulse oximeter displays colored backgrounds when in alarm mode (low SpO2 or abnormal HR) +- SpO2 alarm: Yellow/orange background, HR alarm: Cyan/blue background +- Normal thresholding fails with colored backgrounds +- Decolorization converts: white digits (RGB all > 240) → white (255), everything else → black (0) +- **Applied ONLY to display regions AFTER extraction** (in `recognizeDisplayArea()` function) +- **Layout detection works on original colored frames** - grayscale conversion handles colors fine for contour detection +- **Decolorization timing**: + 1. Layout detection: Original colored frame → grayscale → threshold → find contours (works fine) + 2. Extract display regions: Still colored (280x200px = 56K pixels) + 3. Decolorize extracted regions: Only ~100K pixels total (both displays) + 4. Continue with OCR: grayscale → threshold → template matching +- Each display region is ~280x200px = 56K pixels +- Decolorizing two display regions (~100K pixels) takes <10ms +- Threshold of 240 ensures only pure white pixels (digits/borders) are preserved, not grays +- No performance impact - simple pixel-wise operation on small regions +- Enables reliable digit recognition regardless of alarm state +- Critical for 24/7 monitoring where alarms are expected + +--- + +## Environmental Considerations + +### Light Leakage Issues +**Problem:** Ambient light creates false contours and affects brightness detection. + +**Solutions implemented:** +- Increased brightness threshold to 170 (from lower values) +- More selective contour filtering +- Height filters to exclude oversized boxes (<200px) + +**Manual mitigation:** +Position towel/cloth to block light from reflecting off display surface. + +### Camera Positioning +**Critical:** Camera must be positioned to minimize glare and ensure consistent framing. + +**Current setup:** +- Tripod-mounted camera +- RTSP stream from network camera +- Fixed zoom/focus (no autofocus) + +--- + +## Files and Directories + +### Source Files +- `pulseox-monitor.go` - Main application +- `processor.go` - Frame processor +- `validators.go` - Validation handlers +- `ocr.go` - Recognition engine +- `layout_detection.go` - Display finder +- `normalize.go` - Frame normalization +- `frame_source.go` - Frame sources (RTSP/file) +- `types.go` - Shared data structures +- `helpers.go` - Utility functions (logf) +- `homeassistant.go` - API integration +- `html_report.go` - Review page generator +- `timestamp_ocr.go` - Timestamp validation +- `config.go` - Configuration loading + +### Configuration +- `config.yaml` - Camera URL and Home Assistant settings + +### Training Data +- `training_digits/*.png` - Valid digit templates (0-9) +- `training_digits/invalid/*.png` - Corruption patterns + +### Output +- `review/` - Frame captures and digit images (cleaned each run) + - `f{N}_boxes.jpg` - Visualization of detected areas + - `f{N}_{display}_checks.png` - Digit detection analysis + - `f{N}_{display}_digit[1-3].png` - Individual digit crops + - `f{N}_{display}_full.png` - Full display with recognized value + - `review.html` - Live-updated HTML (append-only) +- `raw_frames/` - Raw processed frames for failed recognitions (cleaned each run) + - `raw_YYYYMMDD-NNNNN.png` - **TRUE raw frames** (unprocessed, portrait orientation, with timestamp) + - Only saved when recognition fails (corruption, low confidence, unrecognized) + - Used for debugging and training data collection + - **Can be tested directly**: `./pulseox-monitor raw_frames/raw_*.png` (will go through full preprocessing pipeline) +- `test_output/` - Debug visualizations (cleaned each run) + - Layout detection debug files + - Corruption debug snapshots with timestamps + +### Logs +- `pulse-monitor_YYYYMMDD_HHMMSS.log` - Detailed execution log + - Console shows only: value changes, warnings, errors + - File logs: all frames, timing, confidence scores, decisions + +--- + +## Development Workflow + +### Version Management + +**CRITICAL: Update version with every build!** + +**For AI Assistant (Claude):** +- ALWAYS increment version number when making ANY code changes +- Update BOTH pulse-monitor-new.go and PROJECT_STATE.md +- Add entry to Version History section documenting what changed +- This is MANDATORY, not optional + +**Version increment process:** + - Open `pulseox-monitor.go` + - Update `const VERSION = "v3.XX"` (increment version number) + - Document what changed in version history section below + +2. **After significant changes:** + - Update this PROJECT_STATE.md document + - Keep architecture, design decisions, and troubleshooting current + - Update "Last Verified Working" date at bottom + +3. **Commit discipline:** + - Version bump = code change + - Every feature/fix gets a version increment + - Keep PROJECT_STATE.md in sync with code + +### Adding New Training Digits + +When you find a good digit image in the review files: + +```bash +# Example: Approve f768_hr_digit3.png as digit "1" +# Creates training_digits/1_2.png (next available variant) +``` + +Current naming: `{digit}_{variant}.png` (e.g., `7_1.png`, `7_2.png`) + +### Adding Invalid Patterns + +When you find corrupted/glitchy images: + +```bash +# Move to invalid folder with descriptive name +# Creates training_digits/invalid/invalid_X.png +``` + +### Testing Changes + +1. Edit code on Mac +2. Compile: `./build.sh` +3. Run: `./pulseox-monitor` +4. Monitor console for value changes and warnings +5. Check log file for detailed diagnostics +6. Review HTML on shutdown (Ctrl+C) for visual verification + +### Performance Tuning + +**Frame rate:** +- Change `skipCount` parameter in NewRTSPSource call (currently 4) +- Located in pulse-monitor-new.go +- Values: 3=~5fps, 4=~3.75fps, 5=~3fps (for 15fps stream) + +**Confidence threshold:** +- Currently 85% for high confidence +- Located in multiple places in main loop +- Lower = more false positives, Higher = more retries + +**Delta threshold:** +- Currently 3 points for stability check +- Change in large delta handler +- Lower = stricter (more held readings), Higher = more lenient + +--- + +## Known Limitations + +1. **Camera-dependent:** Requires good camera positioning and lighting +2. **Display-specific:** Templates are specific to Masimo Rad-G display fonts +3. **No real-time HTML:** Review HTML only generated on shutdown +4. **Three-digit limitation:** Assumes HR displays as 1XX when >99, may need adjustment for other scenarios +5. **Brightness sensitivity:** Very bright ambient light can still cause issues + +--- + +## Future Enhancements (Planned) + +### Docker Deployment +**Target:** Ubuntu 22.04 server with Docker Compose + +**Requirements:** +- Dockerfile with Go + OpenCV +- Mount volumes: config.yaml, training_digits/, review/, logs/ +- Network access to RTSP stream and Home Assistant +- docker-compose.yml integration with existing HA setup + +**Considerations:** +- Image size: ~500MB-1GB with OpenCV +- Multi-stage build possible for smaller runtime image +- Review file access via scp or lightweight HTTP server + +### SQLite + Live HTML +**Goal:** Real-time progress viewing without stopping app + +**Approach:** +- Store frame data in SQLite as processed +- HTTP endpoint generates HTML on-demand from DB +- Access at `http://server:8080/review.html` anytime + +**Benefits:** +- No shutdown required to review progress +- Historical data queryable +- API potential for other tools + +### Adaptive Thresholds +**Idea:** Auto-adjust confidence and delta thresholds based on recent history + +**Example:** +- If 90% of last 100 frames >95% confidence → raise threshold to 90% +- If frequent false positives → increase delta threshold temporarily + +--- + +## Troubleshooting Guide + +### "No templates found for digit X" +**Problem:** Missing training data for a digit. + +**Solution:** Run system, review output, approve good images for that digit. + +### Frequent low confidence warnings +**Causes:** +- Camera moved/vibrated +- Lighting changed +- Focus drifted +- Display brightness changed + +**Solutions:** +- Check camera mounting +- Adjust lighting (block reflections) +- Re-focus camera if needed +- Restart to re-detect layout + +### False corruption detection +**Problem:** Valid digits matched as invalid. + +**Solution:** Review invalid templates, remove overly broad patterns. + +### Large delta causing unnecessary holds +**Problem:** Real physiological changes >3 points being held. + +**Solution:** +- Increase delta threshold (e.g., to 5) +- Or adjust hindsight validation logic +- Consider different thresholds for SpO2 vs HR + +### Values not posting to Home Assistant +**Checks:** +1. Home Assistant URL correct in config.yaml? +2. Network connectivity? +3. Sensors created in HA configuration? +4. Check log file for POST errors +5. Are values actually changing? (change-based posting only) + +--- + +## Performance Characteristics + +**Typical timing per frame (v3.48):** +- Frame acquisition: 150-250ms (camera waiting time) +- Preprocessing: 8-12ms +- Frame scaling: 3-5ms +- SpO2 recognition: 9-12ms +- HR recognition: 9-12ms +- Validation: 0-1ms +- File I/O: 0ms (normal), 40-50ms (failures only) +- Home Assistant POST: 5ms (when values change) +- **Total: 25-35ms per frame** (processing only, excluding camera wait) +- Timestamp check (every 10th frame): ~22ms (optimized with reused OCR client) + +**Debug mode (with /debug flag):** +- Additional file I/O: +40ms (saves all digit images) +- Total: 65-75ms per frame + +**With every 4th frame processing at 15fps:** +- ~3.75 readings per second maximum +- Actual posting rate: varies (only when values change) +- CPU usage: Low (single-threaded, mostly idle) +- Timestamp validation: Every ~2.5 seconds (~22ms each) + +--- + +## Version History + +### v3.57 (Current - True Raw Frame Saving) +- **CRITICAL FIX: Raw frames are now truly raw (before preprocessing)** + - **Problem in v3.56**: "Raw" frames were saved AFTER preprocessing (cropped, rotated, thresholded) + - **User confusion**: Testing with "raw" frames didn't work as expected - frames were already processed + - **Fix**: Clone and threshold raw frame IMMEDIATELY after acquisition, before ANY preprocessing + - **New flow**: + 1. Acquire colored frame from camera (portrait, with timestamp) + 2. **Clone frame → grayscale → threshold at 240 → save as rawThresholded** + 3. Continue with original colored frame → preprocess (crop + rotate) → threshold → OCR + 4. On error: Save rawThresholded to raw_frames/ + - **Result**: Saved "raw" frames are truly raw (portrait orientation, timestamp visible, not cropped/rotated) + - **File size**: ~100KB PNG (thresholded) vs 2MB colored JPG + - **Benefits**: + - Can test raw frames through full preprocessing pipeline + - See exactly what came from camera before any processing + - Small file size (thresholded) vs huge colored images + - **Impact**: When testing with raw_frames/raw_*.png, they'll go through crop+rotate+threshold like normal frames + +### v3.56 (Threshold AFTER Preprocessing) +- **CRITICAL FIX: Moved threshold to AFTER preprocessing**: Timestamp overlay was being thresholded! + - **Problem in v3.54-v3.55**: Threshold happened BEFORE preprocessing + - Colored frame → threshold → binary + - Timestamp extracted from binary frame (appeared as B&W) + - Preprocessed binary frame (cropped B&W timestamp) + - **Correct flow now**: + 1. Acquire colored frame + 2. Extract timestamp from **colored** frame (stays colored!) + 3. Preprocess colored frame (crop timestamp + rotate) + 4. **Threshold preprocessed frame** at 240 → binary + 5. Layout detection on binary + 6. OCR on binary + - **Why this is correct**: + - Timestamp area is removed BEFORE thresholding (stays colored) + - Only the actual display area (after crop/rotate) gets thresholded + - Single threshold at 240, happens once, at the right time + - **Impact**: Saved raw frames now show colored timestamp overlay (as expected) + +### v3.55 (Fixed Double Threshold Bug) +- **CRITICAL FIX: Removed duplicate threshold in saveThresholdedFrame()**: Frame was being thresholded TWICE + - **Problem**: saveThresholdedFrame() was doing grayscale conversion + threshold at 128 + - **But**: rawFrame parameter is ALREADY thresholded binary (from line 313 in pulseox-monitor.go) + - **Result**: Frames were being thresholded twice - once at 240, then again at 128 + - **Fix**: saveThresholdedFrame() now just saves the frame directly (no conversion, no threshold) + - **Impact**: Overlay/timestamp area should look consistent now + - This was the "extra" threshold causing B&W overlay to look different +- **Confirmed ONLY one threshold operation now**: + - pulseox-monitor.go line 292: Threshold at 240 (the ONLY threshold) + - layout_detection.go: No threshold (works on binary) + - ocr.go: No threshold in recognizeDisplayArea() (works on binary) + - processor.go: saveThresholdedFrame() just saves (no threshold) +- Note: ocr.go still has OLD deprecated recognizeDisplay() function with thresholds, but it's never called + +### v3.54 (Single Threshold Architecture - Had Double Threshold Bug) +- Attempted to threshold once at 240 after frame acquisition +- Removed internal thresholds from layout_detection.go and ocr.go +- BUT missed duplicate threshold in saveThresholdedFrame() → fixed in v3.55 + +### v3.53 (CPU Optimization - Thread Limiting) +- Removed duplicate ProcessedCount increment bug +- Limited OpenCV threads to 1 for minimal overhead + +### v3.52 (CPU Optimization Complete) +- **Cleaned up failed GPU acceleration attempts**: Removed AVFoundation backend code + - **Investigation findings**: GPU hardware acceleration for RTSP streams is not feasible with current OpenCV setup + - AVFoundation doesn't support network streams (RTSP), only local camera devices + - OpenCV's ffmpeg backend has VideoToolbox compiled in but doesn't auto-activate for RTSP + - Would require either: (1) patching OpenCV's videoio plugin in C++, (2) complete rewrite with different library, or (3) local transcoding proxy + - **Final optimization achieved**: CPU reduced from 50% to ~30% (40% reduction) + - Single-threaded OpenCV (gocv.SetNumThreads(1)) eliminated thread overhead + - Homebrew's OpenCV has AVFoundation/VideoToolbox support but only for local devices + - ~15% CPU for RTSP H.264 decoding (software) + ~15% CPU for OCR processing + - **Conclusion**: 30% CPU is acceptable for 24/7 monitoring, fan noise significantly reduced + +### v3.51 (Attempted GPU Acceleration - Reverted) +- **Attempted AVFoundation backend**: Failed for RTSP streams (network streams not supported) + +### v3.50 (Reduced CPU Overhead) +- **Added OpenCV thread limiting**: Dramatically reduced CPU usage and thread count + - **Problem**: OpenCV was creating 40 threads (one per CPU core) for small operations + - **Impact**: Excessive context switching, 65%+ CPU usage for simple 280x200px image operations + - **Fix**: Limited OpenCV to 1 thread with `gocv.SetNumThreads(1)` at startup + - **Rationale**: + - Our images are tiny (280x200px per display) + - We process sequentially, not in parallel + - Processing time <20ms - faster than thread spawning overhead + - Single-threaded = zero thread management overhead + - **Expected result**: + - Thread count drops from ~40 to ~5-8 (only Go runtime threads) + - CPU usage drops from 65% to <20% + - No performance loss (processing still <20ms per frame) + - **Startup message**: "šŸ”§ OpenCV threads limited to 1 (single-threaded for minimal overhead)" + - **Testing**: v3.50 with 2 threads showed 23 threads/48% CPU, so moved to 1 thread for further optimization + +### v3.49 (Fixed Frame Counter Bug) +- **CRITICAL FIX: Removed duplicate ProcessedCount increment**: Fixed skipped frame numbers + - **Problem**: `state.ProcessedCount++` was being called twice: + 1. In main loop (pulseox-monitor.go) - for every frame acquired + 2. In processor.go - when values changed and HASS posted + - **Result**: Every time values changed, the counter was incremented twice + - **Symptom**: Frame numbers would skip (e.g., #61, #62, skip #63, #64...) + - **Pattern**: Always skipped the frame number 2 positions after a HASS post + - **Fix**: Removed the duplicate increment in processor.go + - **Now**: Frame numbers increment consistently: #61, #62, #63, #64... + +### v3.48 (Major Performance Improvements) +- **CRITICAL: Moved digit image saves to DEBUG_MODE only**: Massive performance improvement + - **Problem**: OCR was saving 8 PNG files per frame (4 per display Ɨ 2 displays) + - checks.png visualization + - digit1.png, digit2.png, digit3.png (individual digits) + - full.png (labeled composite) + - **Impact**: ~5ms per PNG write Ɨ 8 files = 40ms of unnecessary file I/O per frame + - **Fix**: Wrapped all digit image saves in `if DEBUG_MODE` checks + - **Result**: OCR now takes ~9-12ms total (just template matching, no file I/O) + - **When needed**: Run with `/debug` flag to generate all digit images +- **Only save boxes.jpg on failures**: Reduced file I/O for successful frames + - Previously: Saved layout visualization (boxes.jpg) every time values changed (~51ms) + - Now: Only saves boxes.jpg when OCR fails (corruption, unrecognized, low confidence) + - Successful frames: No extra file I/O beyond what's needed +- **Performance comparison**: + - Before: Total ~80ms (Prep 10ms + Scale 4ms + OCR 20ms + FileIO 40ms + Valid 1ms + HASS 5ms) + - After: Total ~30ms (Prep 10ms + Scale 4ms + OCR 10ms + FileIO 0ms + Valid 1ms + HASS 5ms) + - **~60% faster processing** for normal operation +- **Debug workflow unchanged**: `/debug` flag still generates all images as before + +### v3.47 (Timing Instrumentation) +- **Added comprehensive timing measurements with /timing flag**: Enables detailed performance analysis + - **New /timing command line flag**: Activates timing mode (./pulseox-monitor /timing) + - **Horizontal timing table**: Shows timing breakdown for each frame + - Frame | Acquire | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total + - **Header printed every 20 frames**: Prevents scroll-back issues + - **Timing measurements**: + - **Acquire**: Frame acquisition from RTSP source - WAITING TIME (not processing) + - **Prep**: Preprocessing (crop timestamp + rotate 90°) + - **Scale**: Frame scaling to normalized width (if needed) + - **OCR_SpO2**: SpO2 display recognition (template matching) + - **OCR_HR**: HR display recognition (template matching) + - **Valid**: Validation checks (corruption, confidence, stability) - accumulated across all checks + - **FileIO**: Saving review images and debug files - accumulated across all file operations + - **HASS**: Home Assistant POST request (only when values change) + - **Total**: Wall-clock processing time (Prep + Scale + OCR + Valid + FileIO + logging overhead) + - **Note**: Acquire is separate (camera waiting time). Total may not equal sum due to logging/overhead. + - **Purpose**: Identify performance bottlenecks (e.g., excessive file I/O) + - **Implementation**: + - Added TimingData struct in types.go + - Added TIMING_MODE global flag + - Added printTimingTable() function in helpers.go + - Added timing measurements throughout processFrame() and processFrames() + - Timing data passed through entire processing pipeline + - **Example output**: + ``` + Frame | Acquire | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total + ------|---------|------|-------|----------|--------|-------|--------|------|------- + #123 | 2ms | 6ms | 0ms | 4ms | 4ms | 2ms | 38ms | 5ms | 61ms + #124 | 2ms | 6ms | 0ms | 4ms | 4ms | 1ms | 39ms | 5ms | 61ms + ``` + - **Benefits**: + - Quickly identify slow operations + - Track performance trends over time + - Validate optimization efforts + - Debug unexpected delays + +### v3.46 (Fixed "1" Detection + Raw Frame Improvements) +- **CRITICAL FIX: Lowered "1" digit detection threshold from 90% to 85%** + - **Problem**: "1" digits with 87-89% confidence were being rejected, causing misrecognition + - **Example**: "71" was being cut incorrectly because the "1" wasn't detected (87.1% < 90% threshold) + - **Result**: Wrong width used (100px instead of 72px), throwing off all cut positions + - **Fix**: Threshold lowered to 85% in `hasOneAt()` function (ocr.go:196) + - **Impact**: More reliable detection of "1" digits, especially with slight variations in brightness/focus +- **Auto-enable DEBUG_MODE for file processing** + - When testing with a file (e.g., `./pulseox-monitor raw_frames/thresh_*.png`), DEBUG_MODE automatically enabled + - Eliminates need to add `/debug` flag manually for single frame testing + - Implementation: pulseox-monitor.go:65-66 in `runSingleFrameMode()` +- **Added comprehensive debug output for digit detection** + - Shows display width at start of detection + - For each digit (3→2→1): + - Check position being tested + - hasOneAt() extraction region and confidence score + - Whether detected as "1" or not + - Width being used (72px vs 100px) + - Running total width + - Final CUT position calculated + - Region extraction coordinates and widths for all three digits + - Makes debugging cut position issues trivial - can see exact logic flow +- **Fixed raw frame saving to be truly raw** + - **Previous**: Frames were saved AFTER preprocessing (rotated, cropped) + - **Now**: Frames saved BEFORE any processing (portrait, with timestamp, untouched) + - **Processing applied**: Only thresholded (grayscale → binary) to save space as PNG + - **Benefit**: Can test raw frames with full preprocessing pipeline + - **Implementation**: Clone frame immediately after reading, pass to processFrame() +- **Fixed duplicate logging in test mode** + - **Problem**: Messages with `Both` target were printed twice in test mode + - **Cause**: globalLogger was set to os.Stdout (same as console output) + - **Fix**: Set globalLogger = nil in test mode (pulseox-monitor.go:71) + - **Result**: Clean, non-duplicated output when testing with files + +### v3.45 (Added Processing Time Measurements) +- **Added comprehensive timing measurements**: Now shows how long each frame takes to process + - **Frame processing time**: Displayed in console output: `SpO2=97%, HR=76 bpm [45ms]` + - **Frame acquisition interval**: Logged to file showing time since last frame acquired + - **Purpose**: Understand actual performance vs camera lag vs processing bottlenecks + - **What's measured**: + - Frame acquisition interval (target: 250ms for 4 fps) + - Total processing time per frame (OCR + validation + file I/O) + - Logged for both changed values (console) and unchanged values (file only) + - **Output examples**: + - Console: `SpO2=97%, HR=76 bpm [45ms]` (when value changes) + - Log file: `Frame #123: SpO2=97%, HR=76 bpm (no change) - processed in 42ms` + - Log file: `[TIMING] Frame acquired: +251ms since last (target: 250ms)` + - **Benefits**: + - Identify if processing time is exceeding 250ms target + - See actual frame rate vs target 4 fps + - Distinguish between camera timestamp lag and processing delays + - Debug performance issues with concrete data + - This addresses confusion about camera timestamp lag vs actual processing performance + +### v3.44 (Save Thresholded PNGs, Not Colored JPGs) +- **Changed error frame format**: Now saves thresholded black/white PNG instead of colored JPG + - **Rationale**: Thresholded frames show exactly what the OCR sees, making debugging much more effective + - **Previous**: Saved raw colored camera frames that required manual processing to debug + - **Now**: Saves thresholded (binary) PNG showing the exact image fed to template matching + - **Processing**: Grayscale conversion → threshold at 128 → save as PNG + - **Filename format**: `raw_frames/thresh_YYYYMMDD-NNNNN.png` + - **Saved on**: + - Corruption detection (invalid template match) + - Unrecognized digits (negative values) + - Low confidence (first attempt) + - Low confidence after retry + - **Benefits**: + - Can replay frames directly through OCR pipeline + - See exactly what template matching is working with + - Identify missing templates or threshold issues immediately + - No need to manually preprocess frames for debugging + - PNG format (lossless) vs JPG (lossy with compression artifacts) + - **Storage**: ~50-100KB per frame vs 200-400KB for colored JPG + +### v3.43 (Fixed Timing + Removed Raw Frames) +- **Fixed frame acquisition timing**: 250ms timer now starts from frame ACQUISITION, not processing completion + - **Previous behavior**: Waited 250ms after processing finished (variable frame rate) + - **New behavior**: Waits 250ms from when frame was acquired (consistent 4 fps) + - **Implementation**: Set `lastProcessedTime = now` BEFORE returning frame, not after processing + - **Example**: If processing takes 50ms, next frame acquired at exactly 250ms from previous acquisition + - **Result**: Guaranteed exactly 4 fps regardless of processing time variations +- **Removed raw frame saving**: System no longer saves unprocessed camera frames + - **Rationale**: Thresholded digit images (step4) are already saved for all failures + - **What's saved on errors**: + - review/f{N}_spo2_digit1.png, digit2, digit3 (thresholded crops) + - review/f{N}_hr_digit1.png, digit2, digit3 (thresholded crops) + - review/f{N}_spo2_full.png, hr_full.png (labeled displays) + - review/f{N}_spo2_checks.png, hr_checks.png (visualization) + - **Removed**: + - raw_frames/ directory no longer created + - rawFrame parameter removed from processFrame() + - rawFrame cloning removed from processFrames() loop + - **Benefits**: Reduced memory usage, simpler code, less disk I/O + - Thresholded images are more useful for debugging than raw camera frames + +### v3.42 (Time-Based Frame Processing) +- **Changed from skip-based to time-based frame processing**: More reliable and precise timing + - **Previous approach**: Process every 4th frame (variable timing depending on stream fps) + - **New approach**: Process frames at exactly 4 fps (250ms minimum interval) + - **Implementation**: + - Track `lastProcessedTime` timestamp + - Only process frame if 250ms has elapsed since last processed frame + - Guarantees consistent 4 fps processing rate regardless of stream fps + - **Benefits**: + - Predictable, consistent processing rate + - Works correctly even if stream fps varies + - Simpler to understand and configure + - **Updated NewRTSPSource()**: Removed `skipCount` parameter, now uses hardcoded 250ms interval + - Console message: "šŸ“Š Processing frames at 4 fps (250ms minimum interval)" +- **Confirmed**: All step debug images only saved in DEBUG_MODE + - step1_original.png (only with /debug flag) + - step2_decolorized.png (only with /debug flag) + - step3_grayscale.png (only with /debug flag) + - step4_thresholded.png (only with /debug flag) + +### v3.41 (Critical Performance Fix) +- **CRITICAL: Fixed excessive debug image writes causing high resource usage** + - **Problem**: Saving 8 debug images PER FRAME (4 per display x 2 displays) + - **Impact**: At 3.75 fps = ~30 images/second = massive disk I/O and CPU usage + - **Images being saved on every frame**: + - step1_original.png (colored extraction) + - step2_decolorized.png (after alarm mode decolorization) + - step3_grayscale.png (after grayscale conversion) + - step4_thresholded.png (after binary threshold) + - **Fix**: Only save these debug images when DEBUG_MODE is enabled + - **Result**: Normal operation now only saves essential files (digit crops, checks, full) + - **To enable debug images**: Run with `./pulseox-monitor /debug` flag + - **Performance improvement**: ~70% reduction in disk I/O, significant CPU savings + - This was a leftover from debugging alarm mode decolorization that should never have been in production + +### v3.40 (Actionable Layout Detection Errors) +- **Enhanced layout detection error diagnostics**: Now saves debug visualization when "Only 1 digit display found" error occurs + - **Problem**: Error message was not actionable - couldn't see what boxes were detected or why they failed + - **Fix**: Save timestamped debug image showing all center boxes with labels + - **Debug visualization shows**: + - All boxes crossing 50% line (center region) + - Green boxes = qualified as digit displays (height >= 110px) + - Cyan boxes = rejected (too short) + - Box labels showing dimensions: "#0: H=XXpx OK" or "#1: H=XXpx TOO SHORT" + - Red line showing 50% Y position + - Summary at top: "Found: X center boxes, Y digit displays (need 2)" + - **Enhanced console error output**: + - Shows count of center boxes found + - Lists each box with dimensions and qualification status + - "āœ“ QUALIFIED" for boxes that passed height requirement + - "āœ— TOO SHORT (< 110px)" for boxes that were rejected + - Path to debug image: test_output/layout_error_YYYYMMDD_HHMMSS.jpg + - Makes debugging layout failures much faster - can immediately see what system detected + +### v3.39 (Fixed Frame Scaling Order) +- **CRITICAL FIX: Frame scaling now happens in correct order** + - **Problem**: Boxes were calculated on original frame, then frame was scaled, causing coordinate mismatch + - **Root cause**: Layout detection calculated boxes on unscaled frame, but processFrame() scaled the frame later + - **Fix**: Layout detection now scales frame IMMEDIATELY after calculating scale factor (Step 4) + - **Process flow**: + 1. detectScreenLayoutAreas() calculates scale from bounding box width + 2. Scales frame to 860px width using gocv.Resize() + 3. **Scales ALL coordinate arrays** (bounding box, allBoxes, significantBoxes) + 4. ALL subsequent processing (contour detection, box calculation) uses scaled frame AND scaled coordinates + 5. Returns boxes in scaled coordinates + scale factor + 6. processFrame() scales every frame using same scale factor + 7. Boxes and frames now match perfectly + - **Code changes**: + - layout_detection.go: Added frame scaling at Step 4 (after scale calculation) + - layout_detection.go: Scale all coordinate arrays (scaledBoundingBox, scaledAllBoxes, scaledSignificantBoxes) + - All debug visualizations now use scaled frame (step 5-15 images) + - Removed tryAcquireLayout() wrapper function (unnecessary abstraction) + - pulseox-monitor.go: Calls detectScreenLayoutAreas() directly, stores layout+scale in state + - processor.go: Keeps scaling logic in processFrame() (needed for every frame) + - **Benefits**: + - Boxes calculated on 860px frame match frames that are scaled to 860px + - No more coordinate mismatches between layout detection and OCR processing + - Simpler code flow - no useless wrapper functions + - Alarm box trimming (Steps 8-10) preserved and working correctly + - **Verified**: Bounding box width log will show ~860px after scaling (was variable before) +- **BUG FIX: Review entries now use correct frame numbers** + - **Problem**: Successful readings used `state.FrameCount` (always 0) instead of actual `frameNum` + - **Result**: All images referenced f0_* instead of actual frame numbers + - **Fix**: Changed 3 instances in processor.go to use `frameNum` parameter + - Affected: Invalid physiological values, successful readings, unstable readings + - Failed readings (corruption, unrecognized, low confidence) were already correct +- **OPTIMIZATION: Only save images for frames added to review** + - **Problem**: Every processed frame saved images, even if values didn't change + - **Result**: review/ folder filled with unused images from StatusNoChange frames + - **Fix**: Delete digit images when values haven't changed (StatusNoChange) + - Saves disk space and makes review folder only contain relevant frames + - Deleted files: f*_spo2_checks.png, f*_spo2_digit[1-3].png, f*_spo2_full.png, f*_hr_*.png + +### v3.38 (Dynamic Frame Normalization) +- **Implemented dynamic frame scaling based on bounding box width**: Handles unstable device position + - **Problem**: Pulse oximeter lays loose on bed, causing frame width to vary + - **Solution**: Measure bounding box width in Step 4 of layout detection + - **Process**: + 1. Calculate scale factor: `scale = 860 / boundingBoxWidth` + 2. Log bounding box width and scale factor to console + 3. Store scale factor in ProcessingState.LockedScale + 4. Apply scaling to every frame before OCR processing + - **Changes**: + - `detectScreenLayoutAreas()` now returns `(*ScreenLayout, float64, error)` - includes scale factor + - `tryAcquireLayout()` captures and stores scale in state + - `processFrame()` applies scaling if `state.LockedScale != 1.0` + - Logs: "šŸ“Š Bounding box width: Xpx, Scale factor: Y (target: 860px)" + - **Benefits**: + - Consistent 860px normalized width regardless of camera position/zoom + - Layout coordinates remain stable across frames + - OCR accuracy maintained even when device moves + +### v3.37 (Fixed Layout Detection Height Threshold) +- **Fixed digit display height threshold**: Changed from > 120px to >= 110px + - **Problem**: Layout detection was failing because digit displays were exactly 110-120px tall + - **Previous code**: Required height > 120px (too restrictive) + - **Fixed code**: Requires height >= 110px (matches MIN_BOX_HEIGHT constant) + - **Added debug logging**: Shows dimensions of each center box and whether it's accepted/rejected + - Helps diagnose layout detection failures + - Consistent with original design intent (MIN_BOX_HEIGHT = 110) + +### v3.36 (Unified Logging) +- **Consolidated all logging to use logMessage() function**: Replaced all fmt.Printf(), fmt.Println(), and inconsistent logging patterns + - **ocr.go**: Converted template loading messages to use logMessage() + - Template warnings → Console + Warning level + - Template loading success → Console + Info level + - **frame_source.go**: Converted frame processing info message + - "Processing every Nth frame" → Console + Info level + - **normalize.go**: Converted all debug output messages + - Decolorization progress → LogFile + Debug level + - Debug file saves → LogFile + Debug/Info level + - **All files now use consistent logging pattern**: + - Startup/shutdown → Console + Info + - Main readings → Both + Info + - Warnings/errors → Both + Warning/Error + - Debug messages → LogFile + Debug (respects DEBUG_MODE flag) + - Progress indicators → Console + Info + - **Benefits**: + - Single source of truth for logging behavior + - Consistent timestamp formatting across all logs + - Proper level-based filtering + - Clean separation of console vs file logging + - Debug mode properly controls debug output + +### v3.35 (Logging System Implementation) +- **Implemented logMessage() function in helpers.go**: New unified logging system + - Target: Console, LogFile, Both + - Level: Debug, Info, Warning, Error + - Single-letter prefixes (D, I, W, E) for clean output + - Automatic DEBUG_MODE filtering for debug messages + - Timestamp formatting: [HH:MM:SS.mmm] + - Eliminates need for checking if logger != nil throughout codebase + +### v3.34 (Correct Order + Debug Images) +- **Fixed order of layout detection**: Center region FIRST, digit displays SECOND + - **Correct process order**: + 1. Find ALL contours with RetrievalList + 2. Collect all significant boxes (>50px width or height) + 3. Calculate bounding box from ALL boxes (not just digit displays) + 4. Find 50% Y line from overall bounding box + 5. Filter to boxes crossing 50% line (center region) + 6. From center boxes, identify digit displays (height >= 110px) + 7. Find alarm icons (center boxes extending beyond digit displays) + 8. Trim digit displays based on alarm icons + 9. Split by center X + 10. Find rightmost X in each half (using box center) + 11. Create fixed-width boxes (280px) + - **Debug images at every step** saved to test_output/: + - layout_step1_grayscale.jpg + - layout_step2_threshold.jpg + - layout_step3_eroded.jpg + - layout_step4_all_boxes.jpg (all boxes >50px in green) + - layout_step5_center_boxes.jpg (boxes crossing 50% line in cyan, red line) + - layout_step6_digit_displays.jpg (digit displays in magenta) + - layout_step7_alarm_icons.jpg (alarm icons in yellow, if any found) + - layout_step8_trimmed_displays.jpg (trimmed displays in green) + - layout_step9_final_boxes.jpg (final SpO2/HR boxes with labels) + - Key fix: Bounding box calculated from ALL contours, not just digit displays + - This prevents missing digit displays that are outside early bounding calculations + +### v3.33 (Contour Debug Logic + Center Region - WRONG ORDER) +- **Implemented contour_debug.go logic combined with center region approach**: As instructed + - **Exact implementation of contour_debug.go detection:** + 1. Find ALL contours with RetrievalList + 2. First pass: Identify digit displays (height >= 110px) + 3. Second pass: Find alarm icons (boxes extending beyond digit displays on right) + 4. Trim digit displays: Find leftmost alarm icon edge that overlaps, set as right boundary + - **Combined with center region logic:** + 5. Calculate bounding box from trimmed digit displays + 6. Find 50% Y line + 7. Filter to displays crossing 50% line (center region) + 8. Split by center X + 9. Find rightmost X in each half (using box center to determine which half) + 10. Create fixed-width boxes (CUT_WIDTH = 280px) + - **Key differences from v3.32:** + - Works with TRIMMED digit displays (after alarm icon trimming) + - Alarm icons properly detected and used to trim display boundaries + - Follows exact contour_debug.go algorithm before applying center logic + - Diagnostic output shows trimming actions when alarm icons detected + +### v3.32 (Fixed Layout Detection - WRONG APPROACH) +- **Reverted to proven layout detection logic**: Fixed incorrect box detection from v3.30 + - **Problem in v3.30/v3.31**: Alarm icon detection was finding wrong boxes (yellow alarm square instead of digit displays) + - **Fix**: Restored working center region approach without alarm icon complications + - **Process**: + 1. Find all significant contours (>50px width or height) + 2. Calculate overall bounding box + 3. Filter to boxes crossing 50% Y line with height >= 110px (digit displays) + 4. Create center region from filtered boxes + 5. Split by center X + 6. Find rightmost X in each half using **box center** to determine which half + 7. Create fixed-width boxes (CUT_WIDTH = 280px) + - Key insight: Use box center (not box edge) to determine left vs right half + - Alarm icon trimming removed for now - adds complexity without clear benefit + - Focuses on reliable detection of actual digit display boxes + - Debug images from v3.31 retained for troubleshooting + +### v3.31 (Decolorization After Extraction) +- **Moved decolorization to after display area extraction**: Layout detection now works on original colored frames + - **Problem**: Layout detection was trying to threshold colored alarm backgrounds, failing to find displays + - **Fix**: Layout detection now works on original colored frame (grayscale + threshold) + - **Decolorization moved**: Now happens in `recognizeDisplayArea()` AFTER extracting the small display regions + - **Process flow**: + 1. Preprocess (crop + rotate) - original colored frame + 2. Detect layout on colored frame (grayscale → threshold → find contours) + 3. Extract display areas (280x200px) - still colored + 4. **Decolorize extracted regions** (only ~56K pixels per display) + 5. Continue with OCR (grayscale → threshold → template matching) + - **Debug images added**: Each processing step now saves debug images: + - `f{N}_{display}_step1_original.png` - Original extracted colored region + - `f{N}_{display}_step2_decolorized.png` - After decolorization + - `f{N}_{display}_step3_grayscale.png` - After grayscale conversion + - `f{N}_{display}_step4_thresholded.png` - After thresholding (ready for OCR) + - Allows visual verification of each processing step + - Decolorization only affects OCR, not layout detection + +### v3.30 (Simplified Layout Detection) +- **Simplified layout detection with alarm icon trimming**: Combined best of both approaches + - **Uses RetrievalList** to find ALL contours (including nested ones like alarm icons) + - **Single contour pass**: No more complicated center region extraction and re-detection + - **Alarm icon detection**: Finds boxes that extend beyond digit displays (clock icons, alarm indicators) + - **Smart trimming**: Trims BOTH SpO2 and HR displays if alarm icons found, otherwise uses regular boxes + - **Process**: + 1. Find all contours with RetrievalList + 2. Identify digit displays (height >= 110px) + 3. Find alarm icons (boxes extending beyond digit displays on the right) + 4. Trim digit displays based on leftmost alarm icon edge (if any) + 5. Split displays into left (SpO2) and right (HR) halves using center X + 6. Find rightmost X in each half for display boundaries + 7. Create fixed-width boxes (CUT_WIDTH = 280px) + - Much simpler and more reliable than previous double-contour approach + - Combines center region logic with direct contour approach from contour_debug.go + +### v3.29 (Correct Decolorization Scope) +- **Fixed decolorization scope**: Removed wasteful full-frame decolorization + - **Problem in v3.28**: Decolorizing entire 1390x2736 frame (3.8M pixels) took ~2000ms + - **Reality**: Decolorization only needed for small display regions during OCR + - **Correct flow**: + 1. Preprocess (crop + rotate) - original colored frame + 2. Detect layout on original frame (works fine with colored backgrounds) + 3. Extract display areas (280x200px each) + 4. Decolorize ONLY display regions in `recognizeDisplayArea()` (~100K pixels) + 5. Run OCR on decolorized regions + - Decolorization already exists in `ocr.go` at the right place (line ~281) + - No need for full-frame decolorization - layout detection works on colored frames + - **Performance**: ~40x faster (decolorizing 100K pixels vs 3.8M pixels) + +### v3.28 (Decolorization Timer - REVERTED) +- **Added decolorization timing**: Timer measures how long decolorizeFullFrame() takes + - Timer added after preprocessing in main processing loop + - Displays: "[TIMER] Decolorization took: Xms" + - **Restored decolorization to pipeline**: Was defined but not being called! + - Decolorization now runs between preprocessing and layout detection + - Enables alarm mode support (colored backgrounds → black/white) + - Allows measurement and optimization of decolorization performance + - Frame lifecycle: raw → preprocess → decolorize → detect/process + +### v3.27 (Save ACTUAL Raw Frames) +- **CRITICAL FIX: Raw frames are now truly raw (unprocessed)** + - **Problem**: `raw_frames/raw_*.png` files were being saved AFTER preprocessing (rotation + crop) + - **Result**: When testing with "raw" frames, they were rotated AGAIN, making displays vertical instead of horizontal + - **Fix**: Clone raw frame immediately after reading from source (line 233) + - **Flow now**: + 1. Read frame from source (portrait orientation) + 2. Clone raw frame → keep for potential error saving + 3. Preprocess: crop timestamp + rotate 90° CW + 4. Decolorize + 5. Process + 6. IF error (corruption, low confidence, unrecognized) → save RAW clone + 7. Close raw frame clone + - Raw frames saved in `processor.go` at error points using the `rawFrame` parameter + - Raw frames are now truly unprocessed - can be used for testing by running through full pipeline + - Fixes issue where testing with raw frames would double-rotate them + +### v3.26 (Fixed Redundant Decolorization) +- **CRITICAL FIX: Eliminated 4x redundant decolorization calls**: Now decolorizes ONCE at top level only + - **Problem**: Frame was being decolorized 4 times for every layout detection: + 1. In `detectScreenWidth()` (2668x1458 - full frame) + 2. In `saveNormalizationDebug()` (2668x1458 - same frame again!) + 3. In `detectScreenLayoutAreas()` (2130x1164 - center region) + 4. In `detectScreenLayoutAreas()` center region (859x518 - smaller region) + - **Each call took 2-3 seconds** - total 8-12 seconds wasted per frame! + - **Fix**: Removed ALL decolorization calls from normalize.go and layout_detection.go + - **Now**: Single decolorization in `pulseox-monitor.go` at line 259-267 + - **Result**: Frame processing time reduced from ~12 seconds to ~3 seconds + - Decolorized frame passed to all layout functions - they expect pre-decolorized input + - Simplified `decolorizeFullFrame()` output - removed progress indicators (10%, 20%, etc.) + - Added comments documenting that functions expect already-decolorized frames + +### v3.25 (Aggressive Debug Output) +- **Added extensive debug image saves**: Every processing step now saves an image to test_output/ + - step1_original.jpg - Frame after preprocessing (rotated) + - step2_decolorized.jpg - After decolorizeFullFrame() (should be black/white) + - step3_grayscale.jpg - After grayscale conversion + - step4_thresholded.jpg - After binary threshold +- **Added decolorization progress tracking**: + - Shows progress every 10% of rows processed + - Shows final statistics: white pixel count/percentage, black pixel count/percentage + - Helps diagnose if decolorization is working correctly +- **Purpose**: Debug why alarm mode decolorization appears to not be working + +### v3.24 (Fixed Layout Decolorization) +- **Fixed missing decolorization in layout detection**: Layout detection was still using colored frames + - Added `decolorizeFullFrame()` call in `detectScreenLayoutAreas()` function + - Previously: Only `detectScreenWidth()` was decolorizing, but `detectScreenLayoutAreas()` was not + - Problem: Layout detection was converting colored frame directly to grayscale + - Result: Alarm mode colored backgrounds prevented contour detection + - Fix: Decolorize BEFORE grayscale conversion in both contour detection passes: + 1. Initial contour detection (full frame) + 2. Center region contour detection + - Now: Layout detection works correctly in both normal and alarm modes + - This was the missing piece preventing alarm mode from working end-to-end + +### v3.23 (Higher White Threshold) +- **Increased white pixel detection threshold**: Changed from 200 to 240 for more selective white detection + - Updated in both `decolorizeFullFrame()` (normalize.go) and `decolorizeAlarmMode()` (ocr.go) + - Previous threshold (200): Too permissive, caught near-white/gray pixels + - New threshold (240): More selective, only truly white pixels (near 255) are preserved + - Logic: If RGB all > 240 → keep as white (255), else → convert to black (0) + - Rationale: Display digits are pure white (255), not gray, so higher threshold is more accurate + - Better separation between white digits/borders and colored alarm backgrounds + +### v3.22 (Decolorize Before Layout) +- **Moved decolorization before layout detection**: Alarm mode now works end-to-end + - New `decolorizeFullFrame()` function in normalize.go + - Applied BEFORE layout detection (in `detectScreenWidth()`) + - Previously: Decolorization only happened during OCR (too late) + - Now: Decolorization happens first, then layout detection sees clean white-on-black displays + - Fixes issue where colored alarm backgrounds prevented layout detection + - Layout detection flow: + 1. Decolorize full frame (yellow/cyan → black, white → white) + 2. Convert to grayscale + 3. Threshold at 170 + 4. Find contours + - Same decolorization also applied in `saveNormalizationDebug()` for accurate debug visualization + - Result: System can now detect and recognize displays in both normal and alarm modes + +### v3.21 (Preprocessing Debug) +- **Added debug output for preprocessing**: Prints frame dimensions before and after preprocessing + - Shows: "Before preprocess: WxH" and "After preprocess: WxH" + - Helps verify that rotation is actually happening + - Expected: Portrait (e.g., 1080x1920) → Landscape (e.g., 1920x1080) + - Temporary diagnostic output to verify preprocessFrame() is working + +### v3.20 (Unified Preprocessing) +- **Verified unified preprocessing path**: Both RTSP streaming and file test modes use identical preprocessing + - All frames go through `preprocessFrame()` function: + 1. Crop timestamp (top 68 pixels) + 2. Rotate 90° clockwise (portrait → landscape) + - Applied in unified `processFrames()` loop (line 250) + - Works for both `RTSPSource` and `FileSource` + - **Important**: Raw frames saved to `raw_frames/` are saved AFTER preprocessing + - Therefore: Raw frames are already rotated and ready for testing + - If testing with unrotated frames, they'll go through preprocessing automatically + - Ensures consistent behavior regardless of frame source + - Layout detection expects landscape orientation (SpO2 and HR side-by-side horizontally) + +### v3.19 (Alarm Mode Support) +- **Added alarm mode decolorization**: System now recognizes digits even when pulse oximeter is in alarm state + - New `decolorizeAlarmMode()` function converts colored alarm backgrounds to black + - Alarm mode displays: + - SpO2: Yellow/orange background with white digits + - HR: Cyan/blue background with white digits + - Decolorization logic: + - Pixels where ALL RGB values > 200 → kept as white (digits) + - All other pixels → converted to black (colored backgrounds, borders) + - Applied before grayscale conversion in OCR pipeline + - Enables normal thresholding and template matching to work correctly + - Previously: Colored backgrounds would confuse thresholding, causing OCR failures + - Now: Alarm mode digits recognized just as reliably as normal mode + - Example: "90" in yellow box → converted to "90" in black/white → OCR succeeds + +### v3.18 (Zero Validation) +- **Added '0' digit validation**: Prevents false '0' recognition by checking for characteristic center hole + - New `validateZero()` function checks for a hole at 50% Y (center) + - Scans horizontally from 30%-70% X looking for black pixels (hole) + - Wider horizontal range than '8' validation (30-70% vs 40-50%) since '0' hole is larger + - If validation fails, marks digit as invalid (-1) triggering retry/re-detection + - Applied to all three digit positions (digit1, digit2, digit3) + - Complements existing '8' validation (two holes at 30% and 70% Y) + - Helps prevent misrecognition of corrupted/partial digits or digits with missing centers as '0' + - Example log output: + ``` + validateZero: center hole @ y=50 (50%) x=15-35 (true) + ``` + +### v3.17 (Enhanced Cleanup) +- **Enhanced directory cleanup on startup**: Now cleans all output directories in streaming mode + - Previously: Only `review/` was cleaned on startup + - Now: Cleans `review/`, `raw_frames/`, and `test_output/` in streaming mode + - Single-frame test mode: No cleanup (preserves test output) + - Progress display: Shows each directory being cleaned with checkmarks + - Example output: + ``` + šŸ—‘ļø Cleaning output directories... + - review/... āœ“ + - raw_frames/... āœ“ + - test_output/... āœ“ + ``` + - Rationale: `raw_frames/` accumulates failed recognition frames over time; clean slate each run + - Rationale: `test_output/` contains debug visualizations that should be fresh per session + +### v3.16 (Append-Only HTML Review) +- **Implemented append-only HTML generation**: Review HTML is now written incrementally instead of all at once on shutdown + - HTML header written once at startup via `initReviewHTML()` + - Each frame entry appended immediately via `appendReviewEntry()` as it's processed + - Footer written on shutdown via `closeReviewHTML()` + - Browser handles missing closing tags gracefully, allowing live refresh + - Benefits: + - āœ… Live updates: refresh browser anytime to see latest frames + - āœ… No regeneration overhead: just one file write per frame + - āœ… Can run for hours without memory/performance issues + - āœ… No data loss if process crashes (all processed frames already written) + - All review entry points now consistently call `appendReviewEntry()`: + - Corruption detection + - Unrecognized digits + - Low confidence (both first attempt and retry) + - Invalid physiological values + - Successful readings + - Unstable readings (held for validation) + - Fixed duplicate `appendReviewEntry()` calls in corruption handler (was calling 3 times) + +### v3.15 (Fixed Eight Validation) +- **Fixed validateEight() probe positions**: Corrected hole detection based on actual measurements + - Issue: Was probing at 33% and 67% which hit the white digit and middle bar + - Fix: Now checks horizontal line segments at 30% and 70% height + - Scans X from 40% to 50% of width (10% range centered around middle) + - Looks for ANY black pixel (< 128) in that range = hole detected + - Much more robust than single pixel check - tolerates slight variations + - Holes are BLACK (empty space), digit is WHITE in thresholded image + - Prevents false rejection of valid "8" digits + +### v3.14 (Corruption Debug Output) +- **Added debug output for corruption cycles**: Helps diagnose repeated corruption detection + - When corruption is detected, saves debug files to `test_output/` with timestamp + - Saves normalized frame: `corruption_YYYYMMDD_HHMMSS_frameN_normalized.png` + - Saves layout visualization: `corruption_YYYYMMDD_HHMMSS_frameN_layout.jpg` + - Also includes all digit crops from review/ directory (already generated by OCR) + - Allows offline analysis when system gets stuck in corruption → re-detect → corruption cycles + - Log message: "šŸ’¾ Debug saved: YYYYMMDD_HHMMSS" + +### v3.13 (RTSP Reconnect Fix + Eight Validation) +- **Fixed segmentation fault on RTSP reconnect failure**: Prevents crash when stream reconnection fails + - Added null check before reading from stream + - Sets stream to nil and marks source as closed after failed reconnect + - Returns shouldContinue=false to stop processing instead of attempting to read from null stream + - Prevents SIGSEGV when VideoCapture is null +- **Added '8' digit validation**: Prevents false '8' recognition by checking for two characteristic holes + - New `validateEight()` function probes two specific regions (at 1/3 and 2/3 height) + - Checks if both regions are at least 60% black (indicating holes) + - If validation fails, marks digit as invalid (-1) triggering retry/re-detection + - Applied to all three digit positions (digit1, digit2, digit3) + - Helps prevent misrecognition of corrupted/partial digits as '8' + +### v3.12 (Physiological Value Validation) +- **Added minimum value threshold for Home Assistant posting**: Prevents posting physiologically impossible values + - Validation rule: Both SpO2 and HR must be >= 40 to post to Home Assistant + - Values below 40 are logged to file and added to review, but NOT posted to HASS + - Console displays: "āš ļø Values below 40 - not posting to HASS" + - Rationale: SpO2 or HR below 40 are not physiologically possible in living patients + - Prevents false alarms and invalid data in Home Assistant database + - Common scenarios triggering this: + - Pulse oximeter disconnected (shows dashes, may be misread as low numbers) + - Recognition errors resulting in single-digit values (0-9) + - Display corruption being partially recognized + +### v3.11 (Reduced Raw Frame Storage) +- **Raw frames now only saved on recognition failures**: Significantly reduces disk space usage + - Previously: Every processed frame saved to `raw_frames/` (all successes and failures) + - Now: Only saves frames when recognition fails (corruption, unrecognized, low confidence) + - Successful readings and unstable readings (held for validation) no longer saved + - Failure cases where frames are saved: + - āŒ Corrupted frames (invalid pattern matched) + - āŒ Unrecognized digits (negative values) + - āŒ Low confidence readings (both first attempt and retry) + - Typical result: ~90% reduction in raw frame storage for stable monitoring + - Failed frames still available for debugging and training data collection + +### v3.10 (Corruption Frame Number) +- **Added frame number to corruption log message**: Easier debugging of false corruption detections + - Message now shows: `[CORRUPTION] Frame #123 - Digit = -1 detected: SpO2(9,5) HR(7,-1) - skipping frame` + - Helps identify which specific frame triggered invalid pattern match + - Useful for reviewing frames in HTML output and approving/removing invalid templates + +### v3.9 (Simplified Digit Detection Arrays) +- **Simplified digit detection to use only arrays**: Removed all intermediate variables + - `D[1,2,3]` array stores cut positions for digit extraction + - `digitIsOne[1,2,3]` array stores detection results (is it a "1"?) + - Eliminated intermediate variables like digit1IsOne, digit2IsOne, digit3IsOne, middleStart, rightStart + - Code now directly uses array notation throughout: `digitIsOne[2]`, `D[3]`, etc. + - Cleaner, more maintainable code with single source of truth +- **Fixed "13" recognition bug**: D1 position now correctly calculated from accumulated widths + - Loop accumulates widths: digit3 → digit2 → digit1 + - D1 check position = w - digit3Width - digit2Width - 5 + - Previously was checking at wrong position when only 2 digits present + +### v3.8 (Code Cleanup & Digit Detection Loop) +- **Added helpers.go**: New file with `logf()` helper function + - Eliminates repetitive `if logger != nil` checks throughout codebase + - Makes logging calls cleaner and more maintainable + - Applied across processor.go, validators.go, and ocr.go +- **Refactored digit detection to use loop**: Replaced repetitive digit checking code with elegant loop + - Uses arrays for digit names, detection results, and widths + - Accumulates width progressively (digit3 → digit2 → digit1) + - More maintainable and easier to understand + - Eliminates code duplication + +### v3.7 (3-Digit Value Calculation Fix) +- **Fixed 3-digit value calculation bug**: Removed score requirement for digit1 detection + - Issue: When `digit1IsOne` detected a "1" digit, code also required template match score > 50% + - Problem: digit1 region includes empty padding, causing low template match scores + - Result: 106 was calculated as 6, 103 as 3, 104 as 4 + - Fix: Trust `hasOneAt()` detection alone - if digit1IsOne is true, use 100 + num2*10 + num3 + - The `hasOneAt()` function already confirms there's a "1" pattern at the correct position + +### v3.6 (Millisecond Timestamps) +- **Added millisecond precision to timestamps**: Changed timestamp format from `15:04:05` to `15:04:05.000` + - Provides more precise timing for frame processing and reading logs + - Helpful for debugging timing issues and frame processing delays + - Format: `[03:03:17.245] SpO2=93%, HR=85 bpm` + +### v3.5 (Frame Rate + Timestamp Validation) +- **Frame rate adjustment**: Changed from 30fps to 15fps camera + - Adjusted processing from every 6th frame to every 4th frame + - Processing rate: ~3.75fps (was ~5fps) + - Camera was struggling with 30fps, reduced to 15fps +- **Timestamp validation**: Added OCR check of camera timestamp + - Validates timestamp every 10 processed frames (~2.5 seconds) + - Warns if camera time differs by >3 seconds from server + - Uses optimized gosseract library (~22ms per check) + - **Optimizations**: Downscale 50%, grayscale, Otsu threshold, invert, PNG encode + - **Reused OCR client** for major speed boost (was 120ms, now 22ms warm) + - Silent when drift is within ±3 seconds + - Helps detect frozen/lagging streams +- **New file**: `timestamp_ocr.go` - Timestamp extraction and validation + +### v3.4 (Low Confidence Counter) +- **Added low confidence counter**: Tracks and displays frequency of low confidence frames + - New `LowConfidenceCount` field in ProcessingState + - Console displays count when it increments: "Low confidence (#X)" + - Helps identify camera/lighting issues needing attention + - Counter appears for both first detection and retry failures + +### v3.3 (Corruption Detection Fix) +- **Fixed corruption detection**: Direct check prevents invalid frames from reaching stability + - Changed from `validateCorruption()` wrapper to direct `IsCorrupted()` call + - ANY digit = -1 (invalid template match) now immediately returns StatusCorrupted + - Prevents invalid/corrupted frames from being marked as "unstable" + - Ensures clean separation: corruption → silent skip, low confidence → retry + +### v3.2 (Review & Signal Handling) +- **Fixed Ctrl+C handling**: Simplified signal handling to prevent segfaults + - Removed stream.Close() from signal handler (was causing SIGSEGV) + - Stream cleanup now happens naturally on program exit + - Sets closed flag only, checks happen between frame reads +- **Enhanced review.html**: Failed frames now show all digit images + - Corruption, low confidence, and unrecognized frames display digit crops + - Allows reviewing and approving digits from failed recognitions + - Red background distinguishes failed frames +- **Silenced corruption detection**: Invalid patterns work quietly + - Corrupted frames no longer appear in console or HTML + - Only logged to file for debugging purposes + - Keeps review focused on actionable items + +### v3.1 (Optimizations) +- **Removed FPS display**: Cleaned up console output + - No more "Stream FPS: X (avg over Y frames)" messages + - Simpler, focused console showing only readings and warnings +- **Negative value handling**: Unrecognized digits now trigger immediate retry + - SpO2 or HR < 0 (OCR returned -1) treated as low confidence + - Triggers immediate next frame read + layout re-detection + - Prevents false "unstable" warnings for blurry/unrecognized frames +- **Frame rate adjustment**: Changed from every 4th to every 6th frame + - ~5 fps processing rate from 30fps stream + - Better balance for 30fps camera (was tuned for 15fps) + +### v3.0 (Refactored) +- **Modular architecture**: Split monolithic pulse-monitor.go into focused modules + - processor.go: Frame processing orchestration + - validators.go: Validation logic + - frame_source.go: Abstracted sources (RTSP/file) + - types.go: Shared data structures + - normalize.go: Frame normalization +- **Smart escalation strategy**: Progressive failure handling + - 1st failure: Try next frame (0s wait) + - 2nd failure: Re-detect layout (0s wait) + - 3rd failure: Wait 10s + - 4th failure: Wait 30s + - 5th+ failures: Wait 60s + - Success resets counter +- **Interruptible sleep**: Ctrl+C works immediately during waits + - Checks `source.IsActive()` every second + - No more forced 60s waits before shutdown +- **ConsecutiveFailures counter**: Tracks problems across processing + - Incremented on layout failures, corruption, low confidence + - Reset on successful processing + - Enables intelligent escalation +- See V3_CHANGES.md for complete migration guide + +### v2.35 +- **Robust layout detection under varying light conditions** + - Changed Step 3 logic: instead of picking "2 largest boxes", find rightmost X in each half + - Splits detection area at X-center, finds max(rightX) for left half (SpO2) and right half (HR) + - Handles digit fragmentation: even if "95" breaks into "9" and "5" contours, still finds correct right edge + - Tracks Y range across all contours in each half for proper bounding +- **Raw frame archiving** + - Saves every processed frame to `raw_frames/raw_YYYYMMDD-NNNNN.png` (persistent directory) + - Enables offline testing and replay of detection algorithms + - Files are the rotated frames (after timestamp crop), exactly as fed to detection pipeline + - Not cleaned on restart - accumulates for historical analysis + +### v2.34 +- Every 4th frame processing (improved responsiveness) +- Simplified corruption handling (no immediate retry) +- Silent corruption detection (log only) +- Digit 3 detection offset: -8 pixels +- Invalid template: 2 patterns + +### Previous versions +- v2.33: Added hindsight validation for large deltas +- v2.32: Implemented low confidence retry with layout re-detection +- v2.31: Added invalid digit detection +- Earlier: Various contour detection and template matching improvements + +--- + +## Contact & Context + +**Development Environment:** macOS (local development and testing) +**Target Deployment:** Ubuntu 22.04 with Docker +**Home Assistant Version:** (check your setup) +**Camera:** (specify your RTSP camera model) + +**Project Location:** `/Users/johanjongsma/pulse-monitor/` + +--- + +## Quick Reference + +### Start monitoring +```bash +cd /Users/johanjongsma/pulse-monitor +./pulseox-monitor +``` + +### Stop and generate review +``` +Ctrl+C +# Opens review/review.html automatically +``` + +### Check logs +```bash +tail -f pulseox-monitor_*.log +``` + +### Approve training digit +```bash +# Move good digit image to training_digits/ +# Naming: {digit}_{variant}.png +``` + +### Mark as invalid/corruption +```bash +# Move bad pattern to training_digits/invalid/ +# Naming: descriptive name like "white_blob_2.png" +``` + +--- + +**Document Purpose:** This document serves as a comprehensive reference for understanding the project state, architecture, and reasoning. It should enable a new chat session (or developer) to quickly understand what's been built, why decisions were made, and how to continue development. + +**Last Verified Working:** November 17, 2025 (v3.57 - True raw frame saving) diff --git a/backups/backup_20251125/Systematic_Review_MRI_Brain_Sophia_REVISED.md b/backups/backup_20251125/Systematic_Review_MRI_Brain_Sophia_REVISED.md new file mode 100644 index 0000000..d000804 --- /dev/null +++ b/backups/backup_20251125/Systematic_Review_MRI_Brain_Sophia_REVISED.md @@ -0,0 +1,328 @@ +# Systematic Review of MRI Brain Report for Sophia + +**Patient:** Sophia Helena Jongsma +**Date of Birth:** January 1, 2017 +**Date of MRI:** May 6, 2022 +**Date of Review:** November 10, 2025 +**Original Radiologist:** Javier Quintana, MD + +--- + +## Author's Note + +This document was written by Johan Jongsma (Sophia's father) with the assistance of Claude Opus AI. While I am not a medical professional, I have strong analytical skills that I've applied to understanding my daughter's condition through systematic observation and reasoning. The MRI analysis referenced in this document was performed by Claude Opus AI based on the original radiology reports from May 6, 2022. + +--- + +## Original Radiologist's Report + +> IMPRESSION: In light of the history, imaging findings favored to reflect a degree of global hypoxic injury as discussed in detail. +> +> Indication: >72 hours post-cardiac arrest. Please evaluate Comparison Study Date: 5/6/2022 Technique: Multiplanar multisequence imaging of the brain was performed with and without contrast. +> +> Findings: There is moderate opacification of the paranasal sinuses.1 There is bilateral mastoid fluid.2 The brain is abnormal. Extensive areas of T2 prolongation are identified within the posterior upper cervical cord and brainstem3, midbrain, periaqueductal gray, thalami, basal ganglia4-7. Abnormal increased T2 signal with swelling is identified throughout much of the supratentorial cortex more noticeable in the parietal and occipital regions. This does affect the temporal and frontal lobes as well.8 Diffusion-weighted images are abnormal demonstrating restricted water motion along much of the cortical abnormalities near the gray-white matter junction. Restricted water motion within the basal ganglia and thalami is also noted to a degree. Involvement of some of the white matter is not entirely excluded as well.9-11 The lateral ventricles are symmetric and not particularly dilated and similar to this morning CT.12 The cavum septum pellucidum seen on the 5/2/2020 examination is however no longer visualized suggesting a degree of mass effect.13 However, suprasellar cisterns remain patent with probably similar appearance of the medial temporal lobes and cerebellar tonsils when accounting for the differences technique. The major intracranial flow-voids are grossly preserved. The cerebellar tonsils are rounded appearing and above the level of foramen magnum and again similar.14-16 The adenohypophysis is somewhat thin appearing which may be within anatomic range. Neurohypophysis is orthotopic. Corpus callosum is present. Subcutaneous edema is questioned within the face. As seen on the recent MRI, there does appear to be a degree of increased enhancement within the sulci bilaterally. However, given the underlying changes within the brain, this is favored to be related to congestion rather than leptomeningeal disease. Some degree of enhancement is questioned within the midbrain and periaqueductal gray as well as the inferior colliculus.17-23 + +**Note:** Superscript numbers have been added to the original report text for reference in the detailed analysis that follows. + +--- + +## CRITICAL DISCOVERY #1: Temporal Evolution of Cisterna Magna Enlargement + +### Comparative CT Analysis: May 2, 2022 vs May 6, 2022 + +A systematic comparison of CT scans reveals the most important finding was missed entirely by the radiologist: **the massive cisterna magna enlargement developed progressively over the 4 days following the incident.** + +#### May 2, 2022 CT (8:31 PM - Day of Incident) + +**Observations:** +- **Normal-sized posterior fossa CSF spaces** +- Brainstem in normal anatomic position +- No evidence of enlarged cisterna magna +- Cerebellar hemispheres normal in size and position +- No anterior compression or flattening of brainstem + +#### May 6, 2022 CT (8:49 AM - Four Days Post-Incident) + +**Observations:** +- **Massive enlargement of posterior fossa CSF space (cisterna magna)** +- Brainstem compressed and displaced anteriorly +- Cerebellar hemispheres pushed laterally by CSF accumulation +- Clear evidence of progressive mass effect + +#### May 6, 2022 MRI (Same Day as Second CT) + +**Observations:** +- Confirms massive cisterna magna enlargement +- Brainstem mechanically compressed and flattened +- FLAIR sequences show BLACK signal (CSF), NOT tissue edema + +### Clinical Significance + +This temporal sequence establishes that the enlarged cisterna magna is: + +1. **ACQUIRED** - Not present immediately after the incident +2. **PROGRESSIVE** - Developed over a 4-day period +3. **STRUCTURAL** - Represents CSF accumulation, not hypoxic tissue injury +4. **MECHANICAL** - Causing brainstem compression through mass effect + +--- + +## CRITICAL DISCOVERY #2: Acquired Cerebellar Tonsil Herniation + +### Comparative Analysis: May 2, 2022 vs May 6, 2022 + +In addition to the cisterna magna enlargement, systematic comparison reveals **progressive descent of the cerebellar tonsils** between May 2 and May 6, 2022. + +#### May 2, 2022 CT + +**Observations:** +- Cerebellar tonsils in **normal position** relative to foramen magnum +- Normal tapered configuration of tonsils +- No tonsillar herniation + +#### May 6, 2022 CT + +**Observations:** +- Cerebellar tonsils showing **rounded/blunted contour** +- Beginning descent toward foramen magnum + +#### May 6, 2022 MRI (SAG T1, slice 15/29) + +**Observations:** +- Cerebellar tonsils **descended/herniated** into or through the foramen magnum opening +- Acquired Chiari-like malformation +- Tonsils being compressed downward by the massive cisterna magna above + +### Clinical Significance of Tonsillar Herniation + +The progressive tonsillar descent is critical because: + +1. **Acquired, not congenital** - This is NOT a pre-existing Chiari malformation, but an acquired herniation caused by the enlarging cisterna magna pushing the cerebellum downward + +2. **CSF flow obstruction** - Herniated tonsils can block CSF circulation at the foramen magnum, potentially contributing to or perpetuating the cisterna magna enlargement + +3. **Direct neural compression** - Tonsils compress the brainstem and upper spinal cord, explaining: + - Autonomic dysfunction (breathing, heart rate abnormalities) + - Positional symptoms (inability to tolerate right-side positioning) + - Progressive neurological deterioration + +4. **Surgically addressable** - Posterior fossa decompression surgery can: + - Relieve tonsillar compression + - Restore CSF flow + - Address the cisterna magna + - Potentially reverse or improve symptoms + +### Clinical Context + +According to the patient's father, the medical team urgently performed imaging on May 6 because they were concerned about worsening pressure on the brainstem/spinal cord. The clinical team observed progressive neurological deterioration over those 4 days - which correlates directly with the progressive structural changes documented here: both the cisterna magna enlargement AND the tonsillar herniation were actively worsening. + +**These findings completely change the interpretation of Sophia's condition.** + +--- + +## Detailed Finding-by-Finding Analysis of May 6 MRI Report + +### Finding #1-2: Paranasal sinus opacification and mastoid fluid + +**Sequences reviewed:** AX T1 (slice 1/25) +**Observation:** Confirmed presence of fluid in sinuses and mastoid air cells +**Assessment:** Accurate but clinically irrelevant. Common in intubated ICU patients. + +### Finding #3: "T2 prolongation in posterior upper cervical cord and brainstem" + +**Sequences reviewed:** SAG T1 (15/29), AX T2 (7/25), AX FLAIR (7/25) +**Observation:** +- Massive CSF space posterior to brainstem (enlarged cisterna magna) +- Brainstem mechanically compressed/flattened anteriorly +- FLAIR shows BLACK signal confirming CSF, NOT tissue edema +- **May 2 CT shows this was NOT present initially** + +**Assessment:** **FUNDAMENTAL MISINTERPRETATION** - This is an acquired structural CSF enlargement that developed over 4 days, NOT hypoxic tissue injury as claimed. + +### Finding #4-7: Midbrain, periaqueductal gray, thalami, basal ganglia changes + +**Sequences reviewed:** AX T2 (10,12/25), AX FLAIR (8,9/25) +**Observation:** +- Some T2 hyperintensity confirmed in deep gray structures +- FLAIR confirms tissue changes (not just CSF effect) +- Pattern consistent with some injury but not extensive + +**Assessment:** Partially accurate, changes present but must be interpreted in context of progressive mechanical compression rather than purely hypoxic etiology. + +### Finding #8: "Cortical swelling throughout supratentorial regions" + +**Sequences reviewed:** AX FLAIR (10,12,14,20,22,24/25) +**Observation:** +- Sulci remain OPEN and visible +- Gray-white differentiation preserved +- Minimal signal changes, NO significant swelling + +**Assessment:** **GROSSLY OVERSTATED** - Minimal changes present, no significant swelling evident. + +### Finding #9-11: "Restricted diffusion in cortex and deep gray matter" + +**Sequences reviewed:** T2 weighted trace B1000 (13,23,44/56), Average DCTENSOR B1000 (13,23/56), Fractional Anisotropy B1000 (13,23/56) +**Observation:** +- DWI images essentially NEGATIVE - no significant restricted diffusion +- FA maps show preserved white matter tract integrity + +**Assessment:** **DEMONSTRABLY FALSE** - No evidence of extensive restricted diffusion. This directly contradicts the claim of severe acute hypoxic-ischemic injury. + +### Finding #12: Ventricles symmetric, not dilated + +**Assessment:** Accurate, normal finding. + +### Finding #13: Cavum septum pellucidum comparison to "5/2/2020" + +**Critical Issue:** References exam from "5/2/2020" - TWO YEARS before accident occurred (5/2/2022). Presumably meant to reference May 2, 2022 CT. +**Assessment:** **MAJOR ERROR** - Wrong date by exactly 2 years, indicating severe inattention to detail and destroying report credibility. + +### Finding #14-16: "Cerebellar tonsils rounded appearing and above the level of foramen magnum" + +**Sequences reviewed:** SAG T1 (especially slice 15/29) +**Observation:** +- Radiologist noted "rounded" appearance and claimed tonsils "above foramen magnum" +- Direct comparison with May 2 CT shows cerebellar tonsils were in normal position on May 2 +- May 6 MRI shows cerebellar tonsils have **descended/herniated** into or through the foramen magnum +- **This is acquired tonsillar herniation, NOT a stable finding** + +**Assessment:** **CRITICAL MISINTERPRETATION** - The radiologist noted abnormal tonsil appearance but: +1. Failed to recognize this as an acquired change (comparing to May 2 imaging) +2. Mischaracterized the tonsil position as "above foramen magnum" when MRI shows herniation +3. Missed the clinical significance of progressive tonsillar descent +4. This is acquired Chiari-like malformation from progressive compression, not a pre-existing condition + +### Finding #17-23: Various minor/questioned findings + +**Assessment:** Multiple vague "questioned" findings showing uncertainty throughout report. + +--- + +## Summary of Errors + +| Finding | Radiologist's Claim | Actual Finding | Clinical Impact | +|---------|-------------------|----------------|-----------------| +| **Timeline recognition** | Not addressed | **Cisterna magna AND tonsils changed May 2-6** | **Critical missed findings** | +| **Posterior brainstem "T2 prolongation"** | Hypoxic injury | **Acquired enlarged cisterna magna** | **Fundamental misdiagnosis** | +| **Cerebellar tonsils** | "Rounded, above foramen magnum" | **Acquired herniation/descent** | **Missed progressive compression** | +| Deep gray matter changes | Present | Partially accurate | Some changes present | +| **Cortical "swelling"** | Extensive | **Minimal - Exaggerated** | Overstated severity | +| **Restricted diffusion** | Extensive | **FALSE - DWI negative** | **Major error contradicting hypoxic diagnosis** | +| **Date reference** | 5/2/2020 exam | **Wrong by 2 years** | **Credibility destroyed** | +| Structural findings | Various | Misinterpreted | Mechanical vs hypoxic confusion | + +--- + +## What the Report SHOULD Have Said + +> **IMPRESSION:** Serial imaging demonstrates progressive structural abnormalities in the posterior fossa developing between May 2 and May 6, 2022, including marked enlargement of the cisterna magna and acquired cerebellar tonsillar herniation, resulting in brainstem and spinal cord compression. The absence of significant restricted diffusion on DWI sequences argues strongly against extensive acute hypoxic-ischemic injury as the primary pathology. Mild T2 signal changes in deep gray structures may represent limited injury, but the dominant findings are progressive structural/mechanical pathology rather than hypoxic injury. +> +> **COMPARISON:** CT brain May 2, 2022 shows normal posterior fossa anatomy with normal cerebellar tonsil position. Current study (May 6, 2022) demonstrates dramatic interval development of: +> 1. Markedly enlarged cisterna magna +> 2. Acquired cerebellar tonsillar herniation +> +> **KEY FINDINGS:** +> +> 1. **Progressive cisterna magna enlargement** - Developed between May 2 and May 6, 2022 (documented on serial CT) +> 2. **Acquired cerebellar tonsillar herniation** - Tonsils descended from normal position (May 2) to herniation through foramen magnum (May 6), creating Chiari-like malformation +> 3. **Acquired brainstem compression** - Anterior displacement and flattening of brainstem due to posterior CSF accumulation +> 4. **CSF flow obstruction** - Herniated tonsils blocking CSF circulation at foramen magnum level +> 5. Mild T2 hyperintensity in bilateral thalami and basal ganglia without corresponding restricted diffusion +> 6. **No significant cortical edema or swelling** - sulci remain visible, gray-white differentiation preserved +> 7. **No evidence of extensive acute hypoxic-ischemic injury** on diffusion-weighted imaging +> 8. Preserved white matter tract integrity on fractional anisotropy maps +> +> **CLINICAL CORRELATION:** +> The progressive nature of both the cisterna magna enlargement and tonsillar herniation over 4 days, accompanied by clinical deterioration prompting urgent imaging, suggests an active structural process rather than immediate traumatic or hypoxic injury. The mechanism may represent: +> - Post-traumatic arachnoid disruption with progressive CSF accumulation +> - Impaired CSF circulation/resorption exacerbated by tonsillar herniation +> - Progressive posterior fossa pressure changes causing secondary tonsillar descent +> +> **RECOMMENDATION:** URGENT neurosurgical consultation for evaluation of: +> 1. Acquired cisterna magna enlargement +> 2. Acquired cerebellar tonsillar herniation (Chiari-like malformation) +> 3. Brainstem and spinal cord compression +> +> The progressive nature of these findings over 4 days with accompanying clinical deterioration suggests an evolving process requiring prompt neurosurgical assessment. Posterior fossa decompression may be indicated to: +> - Relieve tonsillar herniation +> - Restore CSF flow +> - Decompress brainstem +> - Address cisterna magna +> - Potentially reverse or improve neurological status + +--- + +## Conclusion + +This systematic review reveals fundamental errors in the original radiological interpretation that completely missed TWO critical progressive findings: + +### Primary Findings: Progressive Acquired Structural Abnormalities + +**1. The enlarged cisterna magna developed between May 2 and May 6, 2022.** +**2. Cerebellar tonsil herniation developed between May 2 and May 6, 2022.** + +This is documented by direct comparison of serial imaging showing: +- Normal posterior fossa anatomy on May 2, 2022 (day of incident) +- Normal cerebellar tonsil position on May 2, 2022 +- Massive cisterna magna enlargement by May 6, 2022 (4 days later) +- Cerebellar tonsillar herniation by May 6, 2022 (4 days later) + +This temporal evolution establishes that we are dealing with **acquired, progressive structural processes** - not pre-existing anatomical variants, and not immediate traumatic injury. + +### Critical Errors in Original Report + +1. **Failure to recognize temporal evolution:** The radiologist had access to the May 2 CT for comparison but failed to identify or comment on the dramatic interval development of BOTH the cisterna magna enlargement AND the cerebellar tonsillar herniation. + +2. **Misidentification of pathology:** The most striking findings - a progressively enlarging cisterna magna causing brainstem compression and progressive tonsillar herniation - were misinterpreted as "T2 prolongation" from hypoxic injury and static "rounded" tonsils. The FLAIR sequences clearly show BLACK signal (CSF), not the bright signal expected from tissue edema. + +3. **False diffusion findings:** The report claimed "extensive restricted diffusion" throughout the brain. Direct review of DWI sequences shows they are essentially negative. This directly contradicts the diagnosis of severe acute hypoxic-ischemic injury. + +4. **Basic factual errors:** The report references a comparison study from "5/2/2020" - exactly two years before the actual incident. This appears to be a typo for 5/2/2022, but such an error in a critical report indicates severe inattention to detail. + +5. **Exaggeration of findings:** Claimed "extensive swelling" where minimal changes exist. + +6. **Mischaracterization of tonsillar position:** Described tonsils as "above the level of foramen magnum" when imaging shows herniation into or through the foramen magnum. + +### Clinical Implications + +**Sophia has acquired, progressive posterior fossa pathology consisting of:** +1. **Cisterna magna enlargement** causing brainstem compression +2. **Cerebellar tonsillar herniation** (acquired Chiari-like malformation) causing: + - CSF flow obstruction at foramen magnum + - Direct brainstem and upper spinal cord compression + - Autonomic dysfunction + - Positional symptoms + +**These conditions:** +- Developed progressively over 4 days following the incident +- Represent structural/mechanical problems rather than purely hypoxic injury +- May be amenable to neurosurgical intervention (posterior fossa decompression) +- Have completely different prognostic implications than diffuse hypoxic-ischemic injury + +**The absence of significant restricted diffusion on DWI sequences argues strongly against extensive acute hypoxic-ischemic injury as the primary diagnosis.** The radiologist's conclusion of "global hypoxic injury" is not supported by the imaging findings and completely misses the progressive structural pathology that is the dominant finding. + +### Mechanism of Injury: Implications + +The progressive development of both cisterna magna enlargement and tonsillar herniation between May 2 and May 6 suggests several possible mechanisms: + +1. **Post-traumatic CSF accumulation** - Disruption of arachnoid membranes or posterior fossa structures allowing CSF to accumulate, with increasing pressure causing downward cerebellar displacement and tonsillar herniation + +2. **Impaired CSF circulation** - Damage to CSF resorption pathways leading to focal accumulation, with subsequent mass effect causing tonsillar descent and further CSF flow obstruction, creating a progressive cycle + +3. **Combined mechanism** - Initial CSF accumulation leading to tonsillar herniation, which then further impairs CSF flow and perpetuates the enlargement + +The key point is that these are **progressive, acquired processes that may be reversible or amenable to treatment** - fundamentally different from the fixed, irreversible hypoxic injury diagnosis that was given. + +**This analysis fundamentally changes the understanding of Sophia's condition and has profound implications for both prognosis and treatment options, including the critical potential role for neurosurgical intervention to address:** +- Mechanical brainstem compression from cisterna magna +- Cerebellar tonsillar herniation and CSF flow obstruction +- Progressive posterior fossa pathology that was actively worsening and prompted urgent clinical concern + +The medical team's urgent imaging on May 6 due to concerns about "worsening pressure" was clinically appropriate - they were observing the effects of these progressive structural changes. The tragedy is that the radiologist completely missed documenting this progressive pathology, instead attributing everything to irreversible hypoxic injury. + +--- + +**Document prepared by:** Johan Jongsma +**Date:** November 10, 2025 +**Assistance:** Claude Opus AI +**Purpose:** Medical consultation with neurology specialist diff --git a/backups/backup_20251125/build.sh b/backups/backup_20251125/build.sh new file mode 100755 index 0000000..e20a104 --- /dev/null +++ b/backups/backup_20251125/build.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Build script for pulseox-monitor v3.10 + +echo "=== Building PulseOx Monitor v3.10 (Corruption Frame Number) ===" +echo "" + +# List of all source files needed +FILES="pulseox-monitor.go types.go helpers.go frame_source.go processor.go validators.go ocr.go layout_detection.go config.go homeassistant.go html_report.go timestamp_ocr.go" + +# Check if all files exist +MISSING=0 +for file in $FILES; do + if [ ! -f "$file" ]; then + echo "āŒ Missing file: $file" + MISSING=1 + fi +done + +if [ $MISSING -eq 1 ]; then + echo "" + echo "Error: Missing source files. Please ensure all files are present." + exit 1 +fi + +echo "āœ“ All source files present" +echo "" +echo "Compiling..." + +# Build +go build -o pulseox-monitor $FILES + +if [ $? -eq 0 ]; then + echo "" + echo "āœ“ Build successful: pulseox-monitor" + echo "" + echo "Test with:" + echo " ./pulseox-monitor raw_frames/raw_20251028-00123.png # Single frame" + echo " ./pulseox-monitor # Live stream" + echo "" +else + echo "" + echo "āŒ Build failed" + exit 1 +fi diff --git a/backups/backup_20251125/config.go b/backups/backup_20251125/config.go new file mode 100644 index 0000000..4c74768 --- /dev/null +++ b/backups/backup_20251125/config.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +type Config struct { + Camera struct { + RTSPURL string `yaml:"rtsp_url"` + } `yaml:"camera"` + HomeAssistant struct { + URL string `yaml:"url"` + Token string `yaml:"token"` + } `yaml:"home_assistant"` + Processing struct { + SampleInterval int `yaml:"sample_interval"` + ChangeThresholdPercent int `yaml:"change_threshold_percent"` + MaxDriftSeconds int `yaml:"max_drift_seconds"` + } `yaml:"processing"` + Logging struct { + Level string `yaml:"level"` + File string `yaml:"file"` + } `yaml:"logging"` +} + +func LoadConfig(filename string) (*Config, error) { + data, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("failed to read config file: %w", err) + } + + var config Config + if err := yaml.Unmarshal(data, &config); err != nil { + return nil, fmt.Errorf("failed to parse config file: %w", err) + } + + return &config, nil +} diff --git a/backups/backup_20251125/config.yaml b/backups/backup_20251125/config.yaml new file mode 100644 index 0000000..edfe6fc --- /dev/null +++ b/backups/backup_20251125/config.yaml @@ -0,0 +1,25 @@ +# Pulse Oximeter Monitor Configuration + +camera: + # Tapo C110 RTSP URL (pulse ox monitoring camera) + rtsp_url: "rtsp://tapohass:!!Helder06@192.168.2.183:554/stream1" + +home_assistant: + # TS140 server IP where HASS is hosted + url: "http://192.168.1.252:8123" + # Long-lived access token for pulse-monitor + token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiIzOTMxMTA4MjczYzI0NDU1YjIzOGJlZWE0Y2NkM2I1OCIsImlhdCI6MTc2MTExNTQxNywiZXhwIjoyMDc2NDc1NDE3fQ.URFS4M0rX78rW27gQuBX-PyrPYMLlGujF16jIBHXYOw" + +processing: + # How often to capture and process frames (seconds) + sample_interval: 1 + # Reject readings that change more than this percent + change_threshold_percent: 5 + # Maximum allowed drift INCREASE in seconds (detects camera lag/freeze) + # Camera can be 10s behind server consistently - that's fine + # But if drift increases by more than this, frame is stale + max_drift_seconds: 3 + +logging: + level: "info" # debug, info, warn, error + file: "./logs/pulse-monitor.log" \ No newline at end of file diff --git a/backups/backup_20251125/create_backup.sh b/backups/backup_20251125/create_backup.sh new file mode 100755 index 0000000..46b1f41 --- /dev/null +++ b/backups/backup_20251125/create_backup.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Backup script for pulse-monitor before refactoring +# Created: 2025-10-30 + +BACKUP_DIR="backups/backup_20251030_043342" +mkdir -p "$BACKUP_DIR" + +# Copy all source files +cp pulse-monitor.go "$BACKUP_DIR/" +cp ocr.go "$BACKUP_DIR/" +cp layout_detection.go "$BACKUP_DIR/" +cp normalize.go "$BACKUP_DIR/" +cp config.go "$BACKUP_DIR/" +cp homeassistant.go "$BACKUP_DIR/" +cp html_report.go "$BACKUP_DIR/" +cp PROJECT_STATE.md "$BACKUP_DIR/" +cp config.yaml "$BACKUP_DIR/" + +echo "āœ“ Backup completed to $BACKUP_DIR" +ls -lh "$BACKUP_DIR" diff --git a/backups/backup_20251125/detection.go b/backups/backup_20251125/detection.go new file mode 100644 index 0000000..d131962 --- /dev/null +++ b/backups/backup_20251125/detection.go @@ -0,0 +1,1035 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "math" + "sort" + + "gocv.io/x/gocv" +) + +// Global DEBUG flag is declared in test_rotation.go + +// Band represents a vertical band of contours +type Band struct { + minX int + maxX int + minY int + maxY int +} + +// DetectBands finds and groups contours into vertical bands +// Only returns bands with height >= minHeight pixels +func DetectBands(binary gocv.Mat, minHeight int) []Band { + type Box struct { + minX int + maxX int + minY int + maxY int + } + + // Find contours + contours := gocv.FindContours(binary, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + // Get all bounding boxes + boxes := []Box{} + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + boxes = append(boxes, Box{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y, + maxY: rect.Max.Y, + }) + } + + // Sort boxes by minY + sort.Slice(boxes, func(i, j int) bool { + return boxes[i].minY < boxes[j].minY + }) + + // Group overlapping/adjacent boxes into vertical bands + bands := []Band{} + + for _, box := range boxes { + // Skip boxes shorter than minHeight + height := box.maxY - box.minY + if height < minHeight { + continue + } + + if len(bands) == 0 { + // First band + bands = append(bands, Band{ + minX: box.minX, + maxX: box.maxX, + minY: box.minY, + maxY: box.maxY, + }) + } else { + lastBand := &bands[len(bands)-1] + + // Check if this box overlaps or is close to last band (within 5px) + if box.minY <= lastBand.maxY+5 { + // Merge: extend the band in both X and Y + if box.minX < lastBand.minX { + lastBand.minX = box.minX + } + if box.maxX > lastBand.maxX { + lastBand.maxX = box.maxX + } + if box.maxY > lastBand.maxY { + lastBand.maxY = box.maxY + } + } else { + // New band + bands = append(bands, Band{ + minX: box.minX, + maxX: box.maxX, + minY: box.minY, + maxY: box.maxY, + }) + } + } + } + + return bands +} + +// findBaseline analyzes the baseline in the graph band +// Returns left point, right point, width, number of sections, and error +func findBaseline(binary gocv.Mat, graphBand Band) (*image.Point, *image.Point, int, int, error) { + // Extract just the graph band region + graphRegion := binary.Region(image.Rect(0, graphBand.minY, binary.Cols(), graphBand.maxY+1)) + + // Find all contours in graph region + contours := gocv.FindContours(graphRegion, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + // Get sections (contours) with width >= 5px + type Section struct { + minX int + maxX int + minY int + maxY int + } + sections := []Section{} + + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + width := rect.Max.X - rect.Min.X + height := rect.Max.Y - rect.Min.Y + // Filter: width >= 5px AND height >= 80px + if width >= 5 && height >= 80 { + sections = append(sections, Section{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y + graphBand.minY, // Adjust to full image coordinates + maxY: rect.Max.Y + graphBand.minY, + }) + } + } + + // Check if we have exactly 2 sections + if len(sections) != 2 { + return nil, nil, 0, len(sections), fmt.Errorf("found %d sections (need exactly 2)", len(sections)) + } + + // Sort sections by X position (left to right) + sort.Slice(sections, func(i, j int) bool { + return sections[i].minX < sections[j].minX + }) + + // Horizontal width (left edge of left section to right edge of right section) + horizontalWidth := sections[1].maxX - sections[0].minX + + // Calculate scale factor for offset (target 860px) + scale := 860.0 / float64(horizontalWidth) + offset := int(3.0 * scale) + if offset < 3 { + offset = 3 + } + + // Find rotation points + // Left section: lowest white pixel offset from left edge + leftSection := sections[0] + leftX := leftSection.minX + offset + leftPoint := findLowestWhitePixel(binary, leftX, graphBand.minY, graphBand.maxY) + + // Right section: lowest white pixel offset from right edge + rightSection := sections[1] + rightX := rightSection.maxX - offset + rightPoint := findLowestWhitePixel(binary, rightX, graphBand.minY, graphBand.maxY) + + if leftPoint == nil || rightPoint == nil { + return nil, nil, 0, len(sections), fmt.Errorf("could not find rotation endpoints") + } + + return leftPoint, rightPoint, horizontalWidth, len(sections), nil +} + +// findLowestWhitePixel finds the lowest (max Y) white pixel at given X within Y range +func findLowestWhitePixel(binary gocv.Mat, x int, minY int, maxY int) *image.Point { + lowestY := -1 + for y := minY; y <= maxY; y++ { + if binary.GetUCharAt(y, x) == 255 { + lowestY = y + } + } + if lowestY == -1 { + return nil + } + return &image.Point{X: x, Y: lowestY} +} + +// DetectionResult contains rotation and width detection results +type DetectionResult struct { + Rotation float64 + Width int + ScaleFactor float64 // Scale factor to apply (860 / width) + SpO2 image.Rectangle // SpO2 display area in SCALED, ROTATED coordinates + HR image.Rectangle // HR display area in SCALED, ROTATED coordinates + Success bool +} + +// PreprocessFrame takes a raw frame and prepares it for detection +// Returns the processed binary image +func PreprocessFrame(frame gocv.Mat) gocv.Mat { + // Crop top 68 pixels (timestamp) + cropped := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + + // Rotate 90° clockwise + rotated := gocv.NewMat() + gocv.Rotate(cropped, &rotated, gocv.Rotate90Clockwise) + + // Convert to grayscale + gray := gocv.NewMat() + gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray) + rotated.Close() + + // Threshold to binary + binary := gocv.NewMat() + gocv.Threshold(gray, &binary, 170, 255, gocv.ThresholdBinary) + gray.Close() + + return binary +} + +// DetectRotationAndWidth analyzes a binary frame and returns rotation angle, width, and display areas +func DetectRotationAndWidth(binary gocv.Mat) DetectionResult { + // ========== PHASE 1: Detect rotation & scale from baseline ========== + + // Detect bands (using 80px minimum height for original scale) + bands := DetectBands(binary, 80) + + // Check if we have enough bands + if len(bands) < 2 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - only %d bands found (need at least 2)\n", len(bands)) + } + return DetectionResult{Success: false} + } + + // Find TALLEST band (graph/waveform display) + tallestIdx := 0 + tallestHeight := 0 + for i, band := range bands { + height := band.maxY - band.minY + if height > tallestHeight { + tallestHeight = height + tallestIdx = i + } + } + + // Check if there's a band after the tallest (for digits) + if tallestIdx >= len(bands)-1 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - tallest band is last (idx=%d, total=%d)\n", tallestIdx, len(bands)) + } + return DetectionResult{Success: false} + } + + graphBand := bands[tallestIdx] // Tallest = graph/waveform + + // Find baseline + leftPoint, rightPoint, width, sections, err := findBaseline(binary, graphBand) + if err != nil || sections != 2 { + if DEBUG { + if err != nil { + fmt.Printf("DEBUG: Detection failed - baseline error: %v\n", err) + } else { + fmt.Printf("DEBUG: Detection failed - found %d sections (need exactly 2)\n", sections) + } + } + return DetectionResult{Success: false} + } + + // Calculate rotation angle from baseline points + deltaY := float64(rightPoint.Y - leftPoint.Y) + deltaX := float64(rightPoint.X - leftPoint.X) + rotation := math.Atan2(deltaY, deltaX) * 180 / math.Pi + + // DEBUG: Visualize the baseline points + if DEBUG { + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + + // Draw left point (green) + gocv.Circle(&viz, *leftPoint, 10, color.RGBA{0, 255, 0, 255}, -1) + // Draw right point (red) + gocv.Circle(&viz, *rightPoint, 10, color.RGBA{0, 0, 255, 255}, -1) + // Draw line between them (yellow) + gocv.Line(&viz, *leftPoint, *rightPoint, color.RGBA{0, 255, 255, 255}, 2) + // Draw graph band boundaries + gocv.Line(&viz, image.Pt(0, graphBand.minY), image.Pt(viz.Cols(), graphBand.minY), color.RGBA{255, 0, 255, 255}, 1) + gocv.Line(&viz, image.Pt(0, graphBand.maxY), image.Pt(viz.Cols(), graphBand.maxY), color.RGBA{255, 0, 255, 255}, 1) + + gocv.IMWrite("debug_baseline_points.png", viz) + viz.Close() + fmt.Println("DEBUG: Saved baseline points visualization to debug_baseline_points.png") + } + + if DEBUG { + fmt.Printf("DEBUG: Baseline points: Left(%d,%d) Right(%d,%d)\n", + leftPoint.X, leftPoint.Y, rightPoint.X, rightPoint.Y) + fmt.Printf("DEBUG: Delta Y=%d, Delta X=%d, Rotation=%.3f°\n", + rightPoint.Y-leftPoint.Y, rightPoint.X-leftPoint.X, rotation) + } + + // Calculate scale factor (860 / detected width) + scaleFactor := 860.0 / float64(width) + + if DEBUG { + fmt.Printf("DEBUG: Width=%dpx, Scale factor=%.3f (baseline will become 860px)\n", + width, scaleFactor) + } + + // ========== PHASE 2: Apply transforms ========== + + // Rotate the image to correct tilt + rotated := RotateImage(binary, rotation) + defer rotated.Close() + + // Scale to 860px baseline width + scaled := ScaleByFactor(rotated, scaleFactor) + defer scaled.Close() + + if DEBUG { + fmt.Printf("DEBUG: Transformed image size: %dx%d\n", scaled.Cols(), scaled.Rows()) + } + + // ========== PHASE 3: Detect display areas on transformed image ========== + + // Re-detect bands on scaled image (adjust minHeight for scale) + scaledMinHeight := int(80.0 * scaleFactor) + scaledBands := DetectBands(scaled, scaledMinHeight) + + if len(scaledBands) < 2 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - only %d bands found on scaled image\n", len(scaledBands)) + } + return DetectionResult{Success: false} + } + + // Find tallest band again on scaled image + scaledTallestIdx := 0 + scaledTallestHeight := 0 + for i, band := range scaledBands { + height := band.maxY - band.minY + if height > scaledTallestHeight { + scaledTallestHeight = height + scaledTallestIdx = i + } + } + + // Check if there's a digit band after the graph band + if scaledTallestIdx >= len(scaledBands)-1 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - no digit band after graph on scaled image\n") + } + return DetectionResult{Success: false} + } + + digitBand := scaledBands[scaledTallestIdx+1] + + if DEBUG { + fmt.Printf("DEBUG: Digit band on scaled image: Y[%d-%d], Height=%dpx\n", + digitBand.minY, digitBand.maxY, digitBand.maxY-digitBand.minY) + } + + // Extract digit band region + digitRegion := ExtractBandRegion(scaled, digitBand) + defer digitRegion.Close() + + // Find digit boxes + digitBoxes := FindDigitBoxes(digitRegion, scaledMinHeight) + + if len(digitBoxes) < 2 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - only %d digit boxes found (need at least 2)\n", len(digitBoxes)) + } + return DetectionResult{Success: false} + } + + // Merge digit boxes into SpO2/HR areas + displayAreas := MergeDigitBoxesIntoDisplays(digitRegion, digitBoxes) + + // Convert coordinates from digit-region space to full-image space + // (add digitBand.minY offset to Y coordinates) + spo2Rect := image.Rect( + displayAreas.SpO2.Min.X, + displayAreas.SpO2.Min.Y + digitBand.minY, + displayAreas.SpO2.Max.X, + displayAreas.SpO2.Max.Y + digitBand.minY, + ) + hrRect := image.Rect( + displayAreas.HR.Min.X, + displayAreas.HR.Min.Y + digitBand.minY, + displayAreas.HR.Max.X, + displayAreas.HR.Max.Y + digitBand.minY, + ) + + if DEBUG { + fmt.Printf("DEBUG: SpO2 display area (full coords): X[%d-%d] Y[%d-%d]\n", + spo2Rect.Min.X, spo2Rect.Max.X, spo2Rect.Min.Y, spo2Rect.Max.Y) + fmt.Printf("DEBUG: HR display area (full coords): X[%d-%d] Y[%d-%d]\n", + hrRect.Min.X, hrRect.Max.X, hrRect.Min.Y, hrRect.Max.Y) + } + + return DetectionResult{ + Rotation: rotation, + Width: width, + ScaleFactor: scaleFactor, + SpO2: spo2Rect, + HR: hrRect, + Success: true, + } +} + +// VisualizeBands draws bands on an image with labels (only in DEBUG mode) +func VisualizeBands(binary gocv.Mat, bands []Band, tallestIdx int, filename string) { + if !DEBUG { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + blue := color.RGBA{R: 0, G: 0, B: 255, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + + // Draw all bands + for i, band := range bands { + bandColor := red + if i == tallestIdx { + bandColor = green // Graph band in green + } else if tallestIdx < len(bands)-1 && i == tallestIdx+1 { + bandColor = blue // Numbers band in blue + } + + rect := image.Rect(0, band.minY, viz.Cols(), band.maxY) + gocv.Rectangle(&viz, rect, bandColor, 2) + + label := fmt.Sprintf("Band #%d", i+1) + gocv.PutText(&viz, label, image.Pt(10, band.minY+20), + gocv.FontHersheyPlain, 1.5, yellow, 2) + } + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved visualization to %s\n", filename) +} + +// VisualizeDigitBand highlights the digit band specifically +func VisualizeDigitBand(binary gocv.Mat, bands []Band, tallestIdx int, filename string) { + if !DEBUG || tallestIdx >= len(bands)-1 { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + blue := color.RGBA{R: 0, G: 0, B: 255, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + + // Draw all bands with specific highlighting + for i, band := range bands { + bandColor := red + lineThickness := 1 + labelText := fmt.Sprintf("Band #%d", i+1) + + if i == tallestIdx { + bandColor = green + labelText = "GRAPH BAND" + } else if i == tallestIdx+1 { + // DIGIT BAND - highlight strongly + bandColor = cyan + lineThickness = 3 + labelText = fmt.Sprintf("DIGIT BAND [Y: %d-%d]", band.minY, band.maxY) + } + + rect := image.Rect(0, band.minY, viz.Cols(), band.maxY) + gocv.Rectangle(&viz, rect, bandColor, lineThickness) + + // Add label + labelColor := yellow + if i == tallestIdx+1 { + labelColor = cyan + } + gocv.PutText(&viz, labelText, image.Pt(10, band.minY+20), + gocv.FontHersheyPlain, 1.5, labelColor, 2) + } + + // Draw additional markers for digit band + if tallestIdx < len(bands)-1 { + digitBand := bands[tallestIdx+1] + + // Draw vertical lines at digit band boundaries + for x := 50; x < viz.Cols(); x += 100 { + gocv.Line(&viz, + image.Pt(x, digitBand.minY), + image.Pt(x, digitBand.maxY), + blue, 1) + } + + // Add dimension text + height := digitBand.maxY - digitBand.minY + dimText := fmt.Sprintf("Height: %dpx, Width: 860px", height) + gocv.PutText(&viz, dimText, image.Pt(10, digitBand.maxY+30), + gocv.FontHersheyPlain, 1.2, cyan, 2) + } + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved digit band visualization to %s\n", filename) +} + +// ValidateBaselineTwoSegments checks if the baseline consists of two separate segments +// by counting black-to-white transitions along the baseline (should be exactly 1) +func ValidateBaselineTwoSegments(binary gocv.Mat, leftPoint, rightPoint *image.Point) bool { + // Calculate slope for Y interpolation along the baseline + deltaY := float64(rightPoint.Y - leftPoint.Y) + deltaX := float64(rightPoint.X - leftPoint.X) + slope := deltaY / deltaX + + if DEBUG { + fmt.Printf("DEBUG: Validating baseline segments (slope=%.4f) from X=%d to X=%d\n", + slope, leftPoint.X, rightPoint.X) + } + + // Count black-to-white transitions + transitions := 0 + lastWasWhite := false + + for x := leftPoint.X; x <= rightPoint.X; x++ { + // Interpolate Y position along the baseline, then go 2px up (away from thick edge) + y := leftPoint.Y + int(float64(x-leftPoint.X)*slope) - 2 + isWhite := binary.GetUCharAt(y, x) == 255 + + // Detect black-to-white transition + if isWhite && !lastWasWhite { + transitions++ + } + lastWasWhite = isWhite + } + + if DEBUG { + fmt.Printf("DEBUG: Found %d black-to-white transitions\n", transitions) + } + + // We need exactly 1 black-to-white transition for 2 segments + // (entering the second segment after the gap) + if transitions != 1 { + if DEBUG { + fmt.Printf("DEBUG: INVALID - Need exactly 1 transition, found %d\n", transitions) + } + return false + } + + if DEBUG { + fmt.Println("DEBUG: VALID - Baseline has two distinct segments") + } + return true +} + +// RotateImage rotates an image by the given angle in degrees +// The angle is the detected rotation that needs to be corrected +func RotateImage(img gocv.Mat, angleDegrees float64) gocv.Mat { + // Get image center + center := image.Point{ + X: img.Cols() / 2, + Y: img.Rows() / 2, + } + + // Apply rotation to correct the tilt + // NOTE: If image rotates wrong direction, change to: -angleDegrees + rotMat := gocv.GetRotationMatrix2D(center, angleDegrees, 1.0) + defer rotMat.Close() + + // Apply rotation + rotated := gocv.NewMat() + gocv.WarpAffine(img, &rotated, rotMat, image.Point{X: img.Cols(), Y: img.Rows()}) + + if DEBUG { + fmt.Printf("DEBUG: Applied rotation: %.3f°\n", angleDegrees) + } + + return rotated +} + +// VisualizeSimpleBands draws just the graph and digit bands clearly +func VisualizeSimpleBands(binary gocv.Mat, graphBand, digitBand Band, filename string) { + if !DEBUG { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + + // Draw graph band in green + graphRect := image.Rect(0, graphBand.minY, viz.Cols(), graphBand.maxY) + gocv.Rectangle(&viz, graphRect, green, 2) + gocv.PutText(&viz, "GRAPH BAND", image.Pt(10, graphBand.minY+20), + gocv.FontHersheyPlain, 1.5, green, 2) + + // Draw digit band in cyan with thick border + digitRect := image.Rect(0, digitBand.minY, viz.Cols(), digitBand.maxY) + gocv.Rectangle(&viz, digitRect, cyan, 3) + label := fmt.Sprintf("DIGIT BAND [Y: %d-%d, H: %dpx]", + digitBand.minY, digitBand.maxY, digitBand.maxY-digitBand.minY) + gocv.PutText(&viz, label, image.Pt(10, digitBand.minY+20), + gocv.FontHersheyPlain, 1.5, cyan, 2) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved 2-band visualization to %s\n", filename) +} + +// ExtractBandRegion extracts a specific band region from an image +func ExtractBandRegion(img gocv.Mat, band Band) gocv.Mat { + // Create a region of interest (ROI) for the band + rect := image.Rect(0, band.minY, img.Cols(), band.maxY) + region := img.Region(rect) + + // Clone the region to create an independent Mat + extracted := gocv.NewMat() + region.CopyTo(&extracted) + + if DEBUG { + fmt.Printf("DEBUG: Extracted band region [%d-%d], size: %dx%d\n", + band.minY, band.maxY, extracted.Cols(), extracted.Rows()) + } + + return extracted +} + +// FindDigitBoxes finds individual digit contours within a region +// Returns bounding rectangles for digits larger than minSize +func FindDigitBoxes(digitRegion gocv.Mat, minSize int) []image.Rectangle { + contours := gocv.FindContours(digitRegion, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + var digitBoxes []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + height := rect.Max.Y - rect.Min.Y + + // Filter by minimum size - height only + if height >= minSize { + digitBoxes = append(digitBoxes, rect) + } + } + + // Sort boxes by X position (left to right) + sort.Slice(digitBoxes, func(i, j int) bool { + return digitBoxes[i].Min.X < digitBoxes[j].Min.X + }) + + if DEBUG { + fmt.Printf("DEBUG: Found %d digit contours (H>=%dpx)\n", len(digitBoxes), minSize) + } + + return digitBoxes +} + +// VisualizeDigitBoxes draws boxes around detected digits with numbers +func VisualizeDigitBoxes(digitRegion gocv.Mat, boxes []image.Rectangle, yOffset int, filename string) { + if !DEBUG { + return + } + + // Create visualization + viz := gocv.NewMat() + gocv.CvtColor(digitRegion, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + // Colors for digit boxes + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + magenta := color.RGBA{R: 255, G: 0, B: 255, A: 255} + + // Array of colors to cycle through + colors := []color.RGBA{red, yellow, green, cyan, magenta} + + // Draw each digit box + for i, box := range boxes { + // Cycle through colors + boxColor := colors[i%len(colors)] + + // Draw rectangle around digit + gocv.Rectangle(&viz, box, boxColor, 2) + + // Add digit number label + label := fmt.Sprintf("%d", i+1) + labelPos := image.Pt(box.Min.X+2, box.Min.Y-5) + if labelPos.Y < 10 { + // If label would be outside image, put it inside the box + labelPos.Y = box.Min.Y + 15 + } + gocv.PutText(&viz, label, labelPos, + gocv.FontHersheyPlain, 1.2, boxColor, 2) + + // Show dimensions + width := box.Max.X - box.Min.X + height := box.Max.Y - box.Min.Y + dimText := fmt.Sprintf("%dx%d", width, height) + gocv.PutText(&viz, dimText, image.Pt(box.Min.X, box.Max.Y+12), + gocv.FontHersheyPlain, 0.8, boxColor, 1) + } + + // Add summary text + summary := fmt.Sprintf("Found %d digits (Y offset: %d)", len(boxes), yOffset) + gocv.PutText(&viz, summary, image.Pt(5, viz.Rows()-10), + gocv.FontHersheyPlain, 1.0, green, 1) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved digit boxes visualization to %s\n", filename) +} + +// VisualizeDigitBoxesOnFull draws digit boxes on the full scaled image +func VisualizeDigitBoxesOnFull(fullImage gocv.Mat, boxes []image.Rectangle, digitBand Band, filename string) { + if !DEBUG { + return + } + + // Create visualization + viz := gocv.NewMat() + gocv.CvtColor(fullImage, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + // Colors + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + magenta := color.RGBA{R: 255, G: 0, B: 255, A: 255} + + colors := []color.RGBA{red, yellow, green, cyan, magenta} + + // Draw the digit band outline + bandRect := image.Rect(0, digitBand.minY, viz.Cols(), digitBand.maxY) + gocv.Rectangle(&viz, bandRect, cyan, 1) + gocv.PutText(&viz, "DIGIT BAND", image.Pt(10, digitBand.minY-5), + gocv.FontHersheyPlain, 1.0, cyan, 1) + + // Draw each digit box (adjust Y position to full image coordinates) + for i, box := range boxes { + // Adjust box coordinates to full image (add digitBand.minY offset) + adjustedBox := image.Rectangle{ + Min: image.Pt(box.Min.X, box.Min.Y+digitBand.minY), + Max: image.Pt(box.Max.X, box.Max.Y+digitBand.minY), + } + + // Cycle through colors + boxColor := colors[i%len(colors)] + + // Draw rectangle + gocv.Rectangle(&viz, adjustedBox, boxColor, 2) + + // Add digit number + label := fmt.Sprintf("D%d", i+1) + gocv.PutText(&viz, label, image.Pt(adjustedBox.Min.X+2, adjustedBox.Min.Y+15), + gocv.FontHersheyPlain, 1.0, boxColor, 2) + } + + // Add summary + summary := fmt.Sprintf("Total: %d digits detected", len(boxes)) + gocv.PutText(&viz, summary, image.Pt(10, 30), + gocv.FontHersheyPlain, 1.2, green, 2) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved full image with digit boxes to %s\n", filename) +} + +// ScaleByFactor scales an image by a specific scale factor +func ScaleByFactor(img gocv.Mat, scaleFactor float64) gocv.Mat { + // Calculate new dimensions + newWidth := int(float64(img.Cols()) * scaleFactor) + newHeight := int(float64(img.Rows()) * scaleFactor) + + // Resize image + scaled := gocv.NewMat() + gocv.Resize(img, &scaled, image.Point{X: newWidth, Y: newHeight}, 0, 0, gocv.InterpolationLinear) + + if DEBUG { + fmt.Printf("DEBUG: Scaled image from %dx%d to %dx%d (scale factor: %.3f)\n", + img.Cols(), img.Rows(), newWidth, newHeight, scaleFactor) + } + + return scaled +} + +// DisplayAreas holds the two display rectangles for SpO2 and HR +type DisplayAreas struct { + SpO2 image.Rectangle + HR image.Rectangle +} + +// mergeDigitBoxesWithCenter merges digit boxes into SpO2/HR displays using a specific centerX +func mergeDigitBoxesWithCenter(digitRegion gocv.Mat, digitBoxes []image.Rectangle, centerX int) DisplayAreas { + // Initialize bounding boxes + spo2MinX, spo2MaxX := digitRegion.Cols(), 0 + spo2MinY, spo2MaxY := digitRegion.Rows(), 0 + hrMinX, hrMaxX := digitRegion.Cols(), 0 + hrMinY, hrMaxY := digitRegion.Rows(), 0 + + // Track counts for logging + leftCount := 0 + rightCount := 0 + + // Split boxes by centerX and merge + for _, box := range digitBoxes { + // Calculate box center X to determine which half it's in + boxCenterX := (box.Min.X + box.Max.X) / 2 + + if boxCenterX < centerX { + // LEFT HALF - SpO2 + leftCount++ + if box.Min.X < spo2MinX { + spo2MinX = box.Min.X + } + if box.Max.X > spo2MaxX { + spo2MaxX = box.Max.X + } + if box.Min.Y < spo2MinY { + spo2MinY = box.Min.Y + } + if box.Max.Y > spo2MaxY { + spo2MaxY = box.Max.Y + } + } else { + // RIGHT HALF - HR + rightCount++ + if box.Min.X < hrMinX { + hrMinX = box.Min.X + } + if box.Max.X > hrMaxX { + hrMaxX = box.Max.X + } + if box.Min.Y < hrMinY { + hrMinY = box.Min.Y + } + if box.Max.Y > hrMaxY { + hrMaxY = box.Max.Y + } + } + } + + if DEBUG { + fmt.Printf("DEBUG: Split at center X=%d: %d boxes left (SpO2), %d boxes right (HR)\n", + centerX, leftCount, rightCount) + fmt.Printf("DEBUG: SpO2 merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + spo2MinX, spo2MaxX, spo2MinY, spo2MaxY, + spo2MaxX-spo2MinX, spo2MaxY-spo2MinY) + fmt.Printf("DEBUG: HR merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + hrMinX, hrMaxX, hrMinY, hrMaxY, + hrMaxX-hrMinX, hrMaxY-hrMinY) + } + + return DisplayAreas{ + SpO2: image.Rect(spo2MinX, spo2MinY, spo2MaxX, spo2MaxY), + HR: image.Rect(hrMinX, hrMinY, hrMaxX, hrMaxY), + } +} + +// VisualizeDetectedDisplays visualizes the detected SpO2 and HR display areas +func VisualizeDetectedDisplays(digitRegion gocv.Mat, displayAreas DisplayAreas, digitBoxes []image.Rectangle, centerX int, filename string) { + if !DEBUG { + return + } + + // Create visualization + viz := gocv.NewMat() + gocv.CvtColor(digitRegion, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + // Colors + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + + // Draw all individual digit boxes in green (thin lines) + for _, box := range digitBoxes { + gocv.Rectangle(&viz, box, green, 1) + } + + // Draw SpO2 area in red (thick) + gocv.Rectangle(&viz, displayAreas.SpO2, red, 3) + gocv.PutText(&viz, "SpO2", image.Pt(displayAreas.SpO2.Min.X+5, displayAreas.SpO2.Min.Y+20), + gocv.FontHersheyPlain, 1.5, red, 2) + + // Draw HR area in yellow (thick) + gocv.Rectangle(&viz, displayAreas.HR, yellow, 3) + gocv.PutText(&viz, "HR", image.Pt(displayAreas.HR.Min.X+5, displayAreas.HR.Min.Y+20), + gocv.FontHersheyPlain, 1.5, yellow, 2) + + // Draw center line from baseline + gocv.Line(&viz, + image.Pt(centerX, 0), + image.Pt(centerX, digitRegion.Rows()), + cyan, 2) + gocv.PutText(&viz, fmt.Sprintf("Center: %d", centerX), + image.Pt(centerX-40, 15), + gocv.FontHersheyPlain, 1.0, cyan, 1) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved detection visualization to %s\n", filename) +} + +// MergeDigitBoxesIntoDisplays merges individual digit boxes into two unified display areas +// This is the FINAL step of detection after finding digit boxes +// It splits at the center of the BOUNDING BOX (not image center) +// and merges all left boxes into SpO2, all right boxes into HR display area +func MergeDigitBoxesIntoDisplays(digitRegion gocv.Mat, digitBoxes []image.Rectangle) DisplayAreas { + // First, find the bounding box of ALL digit boxes + contentMinX := digitRegion.Cols() + contentMaxX := 0 + for _, box := range digitBoxes { + if box.Min.X < contentMinX { + contentMinX = box.Min.X + } + if box.Max.X > contentMaxX { + contentMaxX = box.Max.X + } + } + + // Calculate center of the CONTENT bounding box (not image center) + centerX := (contentMinX + contentMaxX) / 2 + + if DEBUG { + fmt.Printf("DEBUG: Content bounding box: X[%d-%d], Center X=%d\n", + contentMinX, contentMaxX, centerX) + } + + // Initialize bounding boxes + spo2MinX, spo2MaxX := digitRegion.Cols(), 0 + spo2MinY, spo2MaxY := digitRegion.Rows(), 0 + hrMinX, hrMaxX := digitRegion.Cols(), 0 + hrMinY, hrMaxY := digitRegion.Rows(), 0 + + // Track counts for logging + leftCount := 0 + rightCount := 0 + + // Split boxes by center X and merge + for _, box := range digitBoxes { + // Calculate box center X to determine which half it's in + boxCenterX := (box.Min.X + box.Max.X) / 2 + + if boxCenterX < centerX { + // LEFT HALF - SpO2 + leftCount++ + if box.Min.X < spo2MinX { + spo2MinX = box.Min.X + } + if box.Max.X > spo2MaxX { + spo2MaxX = box.Max.X + } + if box.Min.Y < spo2MinY { + spo2MinY = box.Min.Y + } + if box.Max.Y > spo2MaxY { + spo2MaxY = box.Max.Y + } + } else { + // RIGHT HALF - HR + rightCount++ + if box.Min.X < hrMinX { + hrMinX = box.Min.X + } + if box.Max.X > hrMaxX { + hrMaxX = box.Max.X + } + if box.Min.Y < hrMinY { + hrMinY = box.Min.Y + } + if box.Max.Y > hrMaxY { + hrMaxY = box.Max.Y + } + } + } + + if DEBUG { + fmt.Printf("DEBUG: Split at center X=%d: %d boxes left (SpO2), %d boxes right (HR)\n", + centerX, leftCount, rightCount) + fmt.Printf("DEBUG: SpO2 merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + spo2MinX, spo2MaxX, spo2MinY, spo2MaxY, + spo2MaxX-spo2MinX, spo2MaxY-spo2MinY) + fmt.Printf("DEBUG: HR merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + hrMinX, hrMaxX, hrMinY, hrMaxY, + hrMaxX-hrMinX, hrMaxY-hrMinY) + } + + return DisplayAreas{ + SpO2: image.Rect(spo2MinX, spo2MinY, spo2MaxX, spo2MaxY), + HR: image.Rect(hrMinX, hrMinY, hrMaxX, hrMaxY), + } +} + +// VisualizeMergedDisplays draws the merged display areas on the digit region +func VisualizeMergedDisplays(digitRegion gocv.Mat, displayAreas DisplayAreas, filename string) { + if !DEBUG { + return + } + + // Create visualization + viz := gocv.NewMat() + gocv.CvtColor(digitRegion, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + // Colors + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + + // Draw SpO2 area in red + gocv.Rectangle(&viz, displayAreas.SpO2, red, 3) + gocv.PutText(&viz, "SpO2", image.Pt(displayAreas.SpO2.Min.X+5, displayAreas.SpO2.Min.Y+20), + gocv.FontHersheyPlain, 1.5, red, 2) + + // Draw HR area in yellow + gocv.Rectangle(&viz, displayAreas.HR, yellow, 3) + gocv.PutText(&viz, "HR", image.Pt(displayAreas.HR.Min.X+5, displayAreas.HR.Min.Y+20), + gocv.FontHersheyPlain, 1.5, yellow, 2) + + // Draw center line at midpoint between the two display areas + centerX := (displayAreas.SpO2.Max.X + displayAreas.HR.Min.X) / 2 + gocv.Line(&viz, + image.Pt(centerX, 0), + image.Pt(centerX, digitRegion.Rows()), + cyan, 1) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved merged displays visualization to %s\n", filename) +} diff --git a/backups/backup_20251125/frame_source.go b/backups/backup_20251125/frame_source.go new file mode 100644 index 0000000..448020e --- /dev/null +++ b/backups/backup_20251125/frame_source.go @@ -0,0 +1,237 @@ +package main + +import ( + "fmt" + "image" + "sync" + "time" + + "gocv.io/x/gocv" +) + +// FrameSource provides frames to process +type FrameSource interface { + // Next returns the next frame and whether to continue processing + // Returns (frame, shouldContinue, error) + Next() (gocv.Mat, bool, error) + + // NextImmediate returns the very next frame (ignoring skip count) + // Used for immediate retry after low confidence/corruption + NextImmediate() (gocv.Mat, bool, error) + + // Close cleans up resources + Close() error + + // Name returns a description of the source + Name() string + + // IsActive returns false if source has been closed + IsActive() bool +} + +// RTSPSource reads from an RTSP stream +type RTSPSource struct { + url string + stream *gocv.VideoCapture + frameNum int + minFrameInterval time.Duration // Minimum time between processed frames + lastProcessedTime time.Time // Last time we processed a frame + lastAcquiredTime time.Time // Last time we acquired a frame (for logging) + closed bool + closeOnce sync.Once +} + +// NewRTSPSource creates a new RTSP frame source +// Processes frames at 4 fps (250ms minimum between frames) +func NewRTSPSource(url string) (*RTSPSource, error) { + stream, err := gocv.VideoCaptureFile(url) + if err != nil { + return nil, fmt.Errorf("failed to connect to RTSP stream: %w", err) + } + + logMessage(Console, Info, "šŸ“Š Processing frames at 4 fps (250ms minimum interval)") + + return &RTSPSource{ + url: url, + stream: stream, + minFrameInterval: 250 * time.Millisecond, + lastProcessedTime: time.Time{}, // Zero time = process first frame immediately + }, nil +} + +// readFrame reads and validates the next frame from the stream +func (s *RTSPSource) readFrame() (gocv.Mat, bool, error) { + // Check if source was closed + if s.closed { + return gocv.NewMat(), false, nil + } + + // Check if stream is valid + if s.stream == nil { + return gocv.NewMat(), false, fmt.Errorf("stream is null") + } + + frame := gocv.NewMat() + if ok := s.stream.Read(&frame); !ok { + frame.Close() + + // Check again before reconnecting + if s.closed { + return gocv.NewMat(), false, nil + } + + // Try to reconnect + s.stream.Close() + time.Sleep(5 * time.Second) + + // Check again after sleep + if s.closed { + return gocv.NewMat(), false, nil + } + + newStream, err := gocv.VideoCaptureFile(s.url) + if err != nil { + s.stream = nil // Set to nil to prevent further read attempts + s.closed = true // Mark as closed + return gocv.NewMat(), false, fmt.Errorf("reconnect failed: %w", err) + } + s.stream = newStream + return gocv.NewMat(), true, nil // Signal to retry + } + + s.frameNum++ + + // Check frame validity + if frame.Empty() || frame.Cols() < 640 || frame.Rows() < 480 { + frame.Close() + return gocv.NewMat(), true, nil // Signal to retry + } + + return frame, true, nil +} + +func (s *RTSPSource) Next() (gocv.Mat, bool, error) { + for { + frame, shouldContinue, err := s.readFrame() + if err != nil || !shouldContinue { + return frame, shouldContinue, err + } + + if frame.Empty() { + // readFrame returned empty (reconnecting or invalid), retry + continue + } + + // Check if enough time has passed since last processed frame was ACQUIRED + now := time.Now() + if !s.lastProcessedTime.IsZero() { + elapsed := now.Sub(s.lastProcessedTime) + if elapsed < s.minFrameInterval { + // Not enough time passed, skip this frame + frame.Close() + continue + } + } + + // Enough time passed, mark acquisition time NOW (before processing) + s.lastProcessedTime = now + + // Log frame acquisition interval + if !s.lastAcquiredTime.IsZero() { + interval := now.Sub(s.lastAcquiredTime).Milliseconds() + logMessage(LogFile, Debug, " [TIMING] Frame acquired: +%dms since last (target: 250ms)", interval) + } + s.lastAcquiredTime = now + + return frame, true, nil + } +} + +func (s *RTSPSource) NextImmediate() (gocv.Mat, bool, error) { + // Get the very next frame (no skip count) + for { + frame, shouldContinue, err := s.readFrame() + if err != nil || !shouldContinue { + return frame, shouldContinue, err + } + + if frame.Empty() { + // readFrame returned empty (reconnecting or invalid), retry + continue + } + + return frame, true, nil + } +} + +func (s *RTSPSource) Close() error { + s.closeOnce.Do(func() { + s.closed = true + // DON'T close the stream here - it causes segfault if read is in progress + // The stream will be cleaned up when main() exits + }) + return nil +} + +func (s *RTSPSource) IsActive() bool { + return !s.closed +} + +func (s *RTSPSource) Name() string { + return fmt.Sprintf("RTSP stream: %s", s.url) +} + +// FileSource reads a single frame from a file +type FileSource struct { + path string + done bool +} + +// NewFileSource creates a new file frame source +func NewFileSource(path string) *FileSource { + return &FileSource{ + path: path, + done: false, + } +} + +func (s *FileSource) Next() (gocv.Mat, bool, error) { + if s.done { + return gocv.NewMat(), false, nil + } + + frame := gocv.IMRead(s.path, gocv.IMReadColor) + if frame.Empty() { + return gocv.NewMat(), false, fmt.Errorf("failed to load frame: %s", s.path) + } + + s.done = true + return frame, false, nil // Return frame, but signal to stop after this +} + +func (s *FileSource) NextImmediate() (gocv.Mat, bool, error) { + // For file source, NextImmediate is same as Next + return s.Next() +} + +func (s *FileSource) Close() error { + return nil +} + +func (s *FileSource) IsActive() bool { + return !s.done +} + +func (s *FileSource) Name() string { + return fmt.Sprintf("File: %s", s.path) +} + +// preprocessFrame crops timestamp and rotates +func preprocessFrame(frame gocv.Mat) gocv.Mat { + // Crop timestamp (top 68 pixels) + noTs := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + rotated := gocv.NewMat() + gocv.Rotate(noTs, &rotated, gocv.Rotate90Clockwise) + noTs.Close() + return rotated +} diff --git a/backups/backup_20251125/go.mod b/backups/backup_20251125/go.mod new file mode 100644 index 0000000..0c8e373 --- /dev/null +++ b/backups/backup_20251125/go.mod @@ -0,0 +1,9 @@ +module pulse-monitor + +go 1.24.4 + +require ( + github.com/otiai10/gosseract/v2 v2.4.1 // indirect + gocv.io/x/gocv v0.42.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/backups/backup_20251125/go.sum b/backups/backup_20251125/go.sum new file mode 100644 index 0000000..3a01201 --- /dev/null +++ b/backups/backup_20251125/go.sum @@ -0,0 +1,7 @@ +github.com/otiai10/gosseract/v2 v2.4.1 h1:G8AyBpXEeSlcq8TI85LH/pM5SXk8Djy2GEXisgyblRw= +github.com/otiai10/gosseract/v2 v2.4.1/go.mod h1:1gNWP4Hgr2o7yqWfs6r5bZxAatjOIdqWxJLWsTsembk= +gocv.io/x/gocv v0.42.0 h1:AAsrFJH2aIsQHukkCovWqj0MCGZleQpVyf5gNVRXjQI= +gocv.io/x/gocv v0.42.0/go.mod h1:zYdWMj29WAEznM3Y8NsU3A0TRq/wR/cy75jeUypThqU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/backups/backup_20251125/helpers.go b/backups/backup_20251125/helpers.go new file mode 100644 index 0000000..4a0920e --- /dev/null +++ b/backups/backup_20251125/helpers.go @@ -0,0 +1,117 @@ +package main + +import ( + "fmt" + "io" + "time" +) + +// LogTarget specifies where to write the log message +type LogTarget int +const ( + Console LogTarget = iota + LogFile + Both +) + +// LogLevel specifies the severity/type of log message +type LogLevel int +const ( + Debug LogLevel = iota + Info + Warning + Error +) + +// Global logger - set once during initialization +var globalLogger io.Writer = nil + +// logMessage writes a formatted log message to the specified target(s) +// Debug messages are automatically filtered out when DEBUG_MODE is false +// +// Usage examples: +// logMessage(Console, Info, "SpO2=%d%%, HR=%d bpm", spo2, hr) +// logMessage(Both, Debug, "Before preprocess: %dx%d", cols, rows) +// logMessage(Both, Error, "Layout detection failed: %v", err) +// logMessage(Both, Warning, "Low confidence (#%d)", count) +func logMessage(target LogTarget, level LogLevel, format string, args ...interface{}) { + // Filter debug messages when not in debug mode + if level == Debug && !DEBUG_MODE { + return + } + + // Format the message + message := fmt.Sprintf(format, args...) + + // Single-letter prefix for fixed-width alignment + var levelChar string + switch level { + case Debug: + levelChar = "D" + case Info: + levelChar = "I" + case Warning: + levelChar = "W" + case Error: + levelChar = "E" + } + + // Format with timestamp + timestamp := time.Now().Format("15:04:05.000") + formattedMessage := fmt.Sprintf("[%s] %s %s\n", timestamp, levelChar, message) + + // Write to target(s) + switch target { + case Console: + fmt.Print(formattedMessage) + case LogFile: + if globalLogger != nil { + fmt.Fprint(globalLogger, formattedMessage) + } + case Both: + fmt.Print(formattedMessage) + if globalLogger != nil { + fmt.Fprint(globalLogger, formattedMessage) + } + } +} + +// logf is DEPRECATED - use logMessage() instead +// Kept temporarily for reference, but no longer used in the codebase +/* +func logf(logger io.Writer, format string, args ...interface{}) { + if logger != nil { + fmt.Fprintf(logger, format, args...) + } +} +*/ + +// printTimingTable prints timing data in horizontal table format +// Prints header every 20 frames for readability +// Note: Acquire is separate (camera waiting time) +// Note: Total is wall-clock processing time (may not equal sum due to logging/overhead) +// Note: HASS is added in main loop after function returns +func printTimingTable(timing TimingData, showHeader bool) { + if !TIMING_MODE { + return + } + + if showHeader { + fmt.Println("") + fmt.Println("Frame | Acquire | Thresh | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total") + fmt.Println("------|---------|--------|------|-------|----------|--------|-------|--------|------|-------") + } + + fmt.Printf("#%-4d | %5dms | %5dms | %3dms | %4dms | %7dms | %5dms | %4dms | %5dms | %3dms | %4dms\n", + timing.FrameNum, + timing.Acquire, + timing.Threshold, + timing.Preprocess, + timing.Scale, + timing.OCR_SpO2, + timing.OCR_HR, + timing.Validation, + timing.FileIO, + timing.HASS, + timing.Total) +} diff --git a/backups/backup_20251125/homeassistant.go b/backups/backup_20251125/homeassistant.go new file mode 100644 index 0000000..361fb29 --- /dev/null +++ b/backups/backup_20251125/homeassistant.go @@ -0,0 +1,43 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +func postToHomeAssistant(config *Config, entityID string, value int, unit string, friendlyName string) error { + url := fmt.Sprintf("%s/api/states/%s", config.HomeAssistant.URL, entityID) + payload := map[string]interface{}{ + "state": fmt.Sprintf("%d", value), + "attributes": map[string]interface{}{ + "unit_of_measurement": unit, + "friendly_name": friendlyName, + "device_class": "measurement", + }, + } + jsonData, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal error: %w", err) + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("request error: %w", err) + } + req.Header.Set("Authorization", "Bearer "+config.HomeAssistant.Token) + req.Header.Set("Content-Type", "application/json") + client := &http.Client{Timeout: 5 * time.Second} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("HTTP error: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != 200 && resp.StatusCode != 201 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) + } + return nil +} diff --git a/backups/backup_20251125/html_report.go b/backups/backup_20251125/html_report.go new file mode 100644 index 0000000..bdb0055 --- /dev/null +++ b/backups/backup_20251125/html_report.go @@ -0,0 +1,281 @@ +package main + +import ( + "fmt" + "os" +) + +type ReviewEntry struct { + FrameNum int + Timestamp string + SpO2Value int + SpO2LeftDigit int + SpO2LeftConf float64 + SpO2RightDigit int + SpO2RightConf float64 + HRValue int + HRLeftDigit int + HRLeftConf float64 + HRRightDigit int + HRRightConf float64 + Failed bool + FailureReason string + Unstable bool + UnstableReason string +} + +// initReviewHTML creates the HTML file with header and opens the table +func initReviewHTML() error { + html := ` + + + + Pulse-Ox Recognition Review + + + + + + +

Pulse-Ox Recognition Review (Live)

+

Tip: Press Ctrl+F (or Cmd+F on Mac) and search for "CHECK" to find frames with low confidence (<85%)

+

Refresh page to see latest frames...

+ + + + + + + + +` + return os.WriteFile("review/review.html", []byte(html), 0644) +} + +// appendReviewEntry appends a single entry to the HTML file +func appendReviewEntry(e ReviewEntry) error { + confClass := func(conf float64) string { + if conf >= 85 { + return "conf-high" + } else if conf >= 70 { + return "conf-med" + } + return "conf-low" + } + + needsReview := e.SpO2LeftConf < 85 || e.SpO2RightConf < 85 || e.HRLeftConf < 85 || e.HRRightConf < 85 + reviewMarker := "" + if needsReview { + reviewMarker = " āš ļøCHECK" + } + + var rowHTML string + + if e.Failed { + // Failed recognition entry + rowHTML = fmt.Sprintf(` + + + + + + +`, e.FrameNum, reviewMarker, e.Timestamp, e.FrameNum, e.FrameNum, e.FailureReason, + e.SpO2Value, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.SpO2LeftDigit, confClass(e.SpO2LeftConf), e.SpO2LeftConf, + e.FrameNum, e.FrameNum, e.SpO2RightDigit, confClass(e.SpO2RightConf), e.SpO2RightConf, + e.HRValue, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.HRLeftDigit, confClass(e.HRLeftConf), e.HRLeftConf, + e.FrameNum, e.FrameNum, e.HRRightDigit, confClass(e.HRRightConf), e.HRRightConf) + } else { + // Successful recognition entry + unstableMarker := "" + if e.Unstable { + unstableMarker = fmt.Sprintf("
āš ļø %s", e.UnstableReason) + } + rowHTML = fmt.Sprintf(` + + + + + + +`, e.FrameNum, reviewMarker, unstableMarker, e.Timestamp, + e.FrameNum, e.FrameNum, + e.SpO2Value, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.SpO2LeftDigit, confClass(e.SpO2LeftConf), e.SpO2LeftConf, + e.FrameNum, e.FrameNum, e.SpO2RightDigit, confClass(e.SpO2RightConf), e.SpO2RightConf, + e.HRValue, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.HRLeftDigit, confClass(e.HRLeftConf), e.HRLeftConf, + e.FrameNum, e.FrameNum, e.HRRightDigit, confClass(e.HRRightConf), e.HRRightConf) + } + + // Open file in append mode + f, err := os.OpenFile("review/review.html", os.O_APPEND|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(rowHTML) + return err +} + +// closeReviewHTML writes the closing tags +func closeReviewHTML() error { + footer := `
FrameTimeDetected BoxesSpO2 RecognitionHR Recognition
%d%s%s + + + āŒ FAILED: %s
+ SpO2: %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+ HR: %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
%d%s%s%s + + + %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+ %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+

+ Color coding: + Green ≄85%, + Orange 70-84%, + Red <70% +

+

To add digits to training set, copy files like: cp review/f5_spo2_digit2.png training_digits/9_2.png

+ +` + + f, err := os.OpenFile("review/review.html", os.O_APPEND|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(footer) + return err +} + +// writeReviewHTML - kept for backward compatibility (final shutdown) +func writeReviewHTML(entries []ReviewEntry) error { + // Just close the HTML properly - entries already appended + return closeReviewHTML() +} diff --git a/backups/backup_20251125/layout_detection.go b/backups/backup_20251125/layout_detection.go new file mode 100644 index 0000000..0384854 --- /dev/null +++ b/backups/backup_20251125/layout_detection.go @@ -0,0 +1,957 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "math" + "sort" + "time" + + "gocv.io/x/gocv" +) + +type Display struct { + IsSplit bool // true = one box split in half, false = two separate boxes + FullRect image.Rectangle // used when IsSplit=true + LeftRect image.Rectangle // used when IsSplit=false + RightRect image.Rectangle // used when IsSplit=false +} + +type ScreenLayout struct { + SpO2Area image.Rectangle + HRArea image.Rectangle +} + +// rotateImage rotates an image by the specified angle (in degrees) +// Positive angle = counter-clockwise, negative = clockwise +func rotateImage(src gocv.Mat, angleDegrees float64) gocv.Mat { + logMessage(Both, Info, " [rotateImage] Input: %dx%d, Angle: %.3f°", src.Cols(), src.Rows(), angleDegrees) + + // Get image center + center := image.Pt(src.Cols()/2, src.Rows()/2) + logMessage(Both, Info, " [rotateImage] Center: (%d,%d)", center.X, center.Y) + + // Get rotation matrix (angle in degrees, scale=1.0) + rotMat := gocv.GetRotationMatrix2D(center, angleDegrees, 1.0) + defer rotMat.Close() + + logMessage(Both, Info, " [rotateImage] Rotation matrix created") + + // Apply rotation + rotated := gocv.NewMat() + gocv.WarpAffineWithParams(src, &rotated, rotMat, image.Pt(src.Cols(), src.Rows()), + gocv.InterpolationLinear, gocv.BorderConstant, color.RGBA{0, 0, 0, 0}) + + logMessage(Both, Info, " [rotateImage] Output: %dx%d, Empty: %v", rotated.Cols(), rotated.Rows(), rotated.Empty()) + + return rotated +} + +// calculateRotationAngle determines the rotation angle needed to straighten the frame +// by analyzing the bottom horizontal boxes (status indicators below 80% Y) +func calculateRotationAngle(preprocessed gocv.Mat) float64 { + const ROTATION_THRESHOLD = 0.5 // Only rotate if angle > 0.5 degrees + + // Threshold to find contours + gray := gocv.NewMat() + gocv.CvtColor(preprocessed, &gray, gocv.ColorBGRToGray) + defer gray.Close() + + thresh := gocv.NewMat() + gocv.Threshold(gray, &thresh, 240, 255, gocv.ThresholdBinary) + defer thresh.Close() + + // Find all contours + contours := gocv.FindContours(thresh, gocv.RetrievalList, gocv.ChainApproxSimple) + defer contours.Close() + + if contours.Size() == 0 { + logMessage(Both, Warning, " [ROTATION] No contours found - no rotation") + return 0.0 + } + + logMessage(Both, Info, " [ROTATION] Found %d contours", contours.Size()) + + // FIRST: Calculate bounding box from ALL contours (no filtering) + // This gives us the true extent of the display + if contours.Size() == 0 { + logMessage(Both, Warning, " [ROTATION] No contours - cannot calculate bounding box") + return 0.0 + } + + firstRect := gocv.BoundingRect(contours.At(0)) + minX := firstRect.Min.X + minY := firstRect.Min.Y + maxX := firstRect.Max.X + maxY := firstRect.Max.Y + + for i := 1; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + if rect.Min.X < minX { minX = rect.Min.X } + if rect.Min.Y < minY { minY = rect.Min.Y } + if rect.Max.X > maxX { maxX = rect.Max.X } + if rect.Max.Y > maxY { maxY = rect.Max.Y } + } + + boundingBoxHeight := maxY - minY + logMessage(Both, Info, " [ROTATION] Bounding box from ALL contours: Y=%d-%d (H=%d)", minY, maxY, boundingBoxHeight) + + // THEN: Get bounding boxes for filtered contours (for rotation calculation) + var boxes []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + // Filter out tiny boxes + if rect.Dx() > 20 && rect.Dy() > 10 { + boxes = append(boxes, rect) + } + } + + logMessage(Both, Info, " [ROTATION] %d boxes after size filter (W>20, H>10)", len(boxes)) + + if len(boxes) < 3 { + logMessage(Both, Warning, " [ROTATION] Only %d boxes found - need at least 3", len(boxes)) + return 0.0 + } + + // Calculate 80% Y threshold relative to bounding box (bottom 20% of bounding box) + threshold80 := minY + int(float64(boundingBoxHeight)*0.8) + logMessage(Both, Info, " [ROTATION] 80%%%% threshold: Y>%d", threshold80) + + // Filter to boxes in bottom 20% (Y > 80% of height) + var bottomBoxes []image.Rectangle + for _, box := range boxes { + // Use box center Y for filtering + centerY := box.Min.Y + box.Dy()/2 + if centerY > threshold80 { + bottomBoxes = append(bottomBoxes, box) + } + } + + logMessage(Both, Info, " [ROTATION] %d boxes in bottom 20%%%% (Y>%d)", len(bottomBoxes), threshold80) + + if len(bottomBoxes) < 3 { + logMessage(Both, Warning, " [ROTATION] Only %d boxes in bottom 20%%%% - need at least 3", len(bottomBoxes)) + return 0.0 + } + + // Sort by Y coordinate (descending = bottom to top) to get lowest boxes + sort.Slice(bottomBoxes, func(i, j int) bool { + return bottomBoxes[i].Min.Y > bottomBoxes[j].Min.Y + }) + + // Take bottom 3 boxes (highest Y coordinates) + topBoxes := bottomBoxes[:3] + logMessage(Both, Info, " [ROTATION] Using 3 lowest boxes:") + for i, box := range topBoxes { + centerY := box.Min.Y + box.Dy()/2 + logMessage(Both, Info, " Box %d: X=%d-%d, Y=%d-%d (centerY=%d)", + i, box.Min.X, box.Max.X, box.Min.Y, box.Max.Y, centerY) + } + + // DEBUG: Save visualization of the three boxes used for rotation + if DEBUG_MODE { + rotDebugVis := preprocessed.Clone() // Clone the colored frame + + // Create bounding box rectangle from calculated values + boundingBox := image.Rect(minX, minY, maxX, maxY) + + // Draw bounding box in blue + gocv.Rectangle(&rotDebugVis, boundingBox, color.RGBA{0, 100, 255, 255}, 3) + + // Draw 80% threshold line in red (this is what we use to filter boxes) + gocv.Line(&rotDebugVis, + image.Pt(0, threshold80), + image.Pt(preprocessed.Cols(), threshold80), + color.RGBA{255, 0, 0, 255}, 3) // Red line at threshold + + // Draw the three selected boxes in green + for _, box := range topBoxes { + gocv.Rectangle(&rotDebugVis, box, color.RGBA{0, 255, 0, 255}, 3) + } + + // Add labels + gocv.PutText(&rotDebugVis, + fmt.Sprintf("80%%%% threshold: Y=%d", threshold80), + image.Pt(10, threshold80-10), + gocv.FontHersheyPlain, 1.5, color.RGBA{255, 0, 0, 255}, 2) + gocv.PutText(&rotDebugVis, + fmt.Sprintf("BBox: %dx%d (Y=%d-%d)", boundingBox.Dx(), boundingBox.Dy(), minY, maxY), + image.Pt(minX+10, minY+30), + gocv.FontHersheyPlain, 1.5, color.RGBA{0, 100, 255, 255}, 2) + + gocv.IMWrite("test_output/rotation_boxes_BEFORE.png", rotDebugVis) + rotDebugVis.Close() + logMessage(Both, Info, " [ROTATION] Saved rotation_boxes_BEFORE.png") + } + + // Find leftmost and rightmost boxes + var leftBox, rightBox image.Rectangle + leftBox = topBoxes[0] + rightBox = topBoxes[0] + + for _, box := range topBoxes[1:] { + if box.Min.X < leftBox.Min.X { + leftBox = box + } + if box.Min.X > rightBox.Min.X { + rightBox = box + } + } + + // Get center Y of each box + leftY := leftBox.Min.Y + leftBox.Dy()/2 + rightY := rightBox.Min.Y + rightBox.Dy()/2 + leftX := leftBox.Min.X + leftBox.Dx()/2 + rightX := rightBox.Min.X + rightBox.Dx()/2 + + logMessage(Both, Info, " [ROTATION] Left box center: (%d,%d), Right box center: (%d,%d)", leftX, leftY, rightX, rightY) + + // Calculate rotation angle + if rightX == leftX { + logMessage(Both, Info, " [ROTATION] Boxes vertically aligned - no rotation") + return 0.0 + } + + slope := float64(rightY-leftY) / float64(rightX-leftX) + angleDeg := math.Atan(slope) * 180.0 / math.Pi + + logMessage(Both, Info, " [ROTATION] Slope=%.4f, Angle=%.3f°", slope, angleDeg) + + // Apply threshold + if angleDeg > -ROTATION_THRESHOLD && angleDeg < ROTATION_THRESHOLD { + logMessage(Both, Info, " [ROTATION] Angle %.3f° below threshold %.1f° - no rotation", angleDeg, ROTATION_THRESHOLD) + return 0.0 + } + + logMessage(Both, Info, " [ROTATION] Will rotate by %.3f°", angleDeg) + return angleDeg +} + +func detectScreenLayoutAreas(rotated gocv.Mat) (*ScreenLayout, float64, error) { + // Correct order: Find center region FIRST (using all contours), THEN find digit displays + + startTime := time.Now() + + // Input is already thresholded binary image - work directly on it + // No grayscale, no threshold - just find contours + + // DEBUG: Save input to see what we're working with + if DEBUG_MODE { + gocv.IMWrite("test_output/layout_step1_input.png", rotated) + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step1_input.png") + } + + // STEP 3: Apply erosion to separate nearby elements (5x5 kernel) - COMMENTED OUT + // step3Start := time.Now() + // kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Pt(5, 5)) + // eroded := gocv.NewMat() + // gocv.Erode(thresh, &eroded, kernel) + // kernel.Close() + // defer eroded.Close() + // fmt.Printf(" [TIMING] Step 3 (Erosion): %dms\n", time.Since(step3Start).Milliseconds()) + + // STEP 2: Find contours - use RetrievalList to get ALL contours including nested + step2Start := time.Now() + contours := gocv.FindContours(rotated, gocv.RetrievalList, gocv.ChainApproxSimple) + defer contours.Close() + + logMessage(Both, Info, " Found %d total contours", contours.Size()) + if contours.Size() == 0 { + logMessage(Both, Error, " ERROR: No contours found in thresholded frame") + logMessage(Both, Error, " Frame dimensions: %dx%d", rotated.Cols(), rotated.Rows()) + // Save debug image + gocv.IMWrite("test_output/error_input.png", rotated) + logMessage(Both, Error, " Saved debug image to test_output/error_input.png") + return nil, 0, nil + } + + logMessage(LogFile, Debug, " [TIMING] Step 2 (Find contours): %dms", time.Since(step2Start).Milliseconds()) + logMessage(LogFile, Debug, " [TIMING] Step 2 (Total): %dms", time.Since(startTime).Milliseconds()) + + // STEP 1: Collect ALL boxes (no filter) and print details + var allBoxes []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + allBoxes = append(allBoxes, rect) + // fmt.Printf(" Box %d: X=%d-%d (W=%d), Y=%d-%d (H=%d)\n", + // i, rect.Min.X, rect.Max.X, rect.Dx(), + // rect.Min.Y, rect.Max.Y, rect.Dy()) + } + + if len(allBoxes) == 0 { + logMessage(Both, Error, " ERROR: No bounding boxes found from contours") + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d boxes (all, no filter)", len(allBoxes)) + logMessage(LogFile, Debug, "") + + // DEBUG: Skip step 6a and 6b visualizations + // rawContoursVis := rotated.Clone() + // ... + // allBoxesVis := rotated.Clone() + // ... + + // STEP 2: Filter to significant boxes (width or height > 30px) for processing + var significantBoxes []image.Rectangle + for _, box := range allBoxes { + if box.Dx() > 30 || box.Dy() > 30 { + significantBoxes = append(significantBoxes, box) + } + } + + if len(significantBoxes) == 0 { + logMessage(Both, Error, " ERROR: No significant boxes found (>30px)") + logMessage(Both, Error, " All %d boxes were too small", len(allBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d significant boxes (>30px) for processing", len(significantBoxes)) + + // STEP 3: Calculate bounding box from ALL boxes (not just significant ones) + // This ensures bottom small numbers are included in the height calculation + minX := allBoxes[0].Min.X + minY := allBoxes[0].Min.Y + maxX := allBoxes[0].Max.X + maxY := allBoxes[0].Max.Y + + for _, box := range allBoxes { + if box.Min.X < minX { + minX = box.Min.X + } + if box.Min.Y < minY { + minY = box.Min.Y + } + if box.Max.X > maxX { + maxX = box.Max.X + } + if box.Max.Y > maxY { + maxY = box.Max.Y + } + } + + boundingBox := image.Rect(minX, minY, maxX, maxY) + boundingBoxWidth := boundingBox.Dx() + logMessage(LogFile, Debug, " Bounding box from all boxes: X=%d-%d, Y=%d-%d, Width=%d", minX, maxX, minY, maxY, boundingBoxWidth) + + // Calculate required scale to normalize to 860px width + const TARGET_WIDTH = 860 + scale := float64(TARGET_WIDTH) / float64(boundingBoxWidth) + logMessage(Both, Info, " šŸ“Š Bounding box width: %dpx, Scale factor: %.3f (target: %dpx)", boundingBoxWidth, scale, TARGET_WIDTH) + + // STEP 4: Scale the frame to 860px width + step4Start := time.Now() + newWidth := int(float64(rotated.Cols()) * scale) + newHeight := int(float64(rotated.Rows()) * scale) + scaled := gocv.NewMat() + gocv.Resize(rotated, &scaled, image.Pt(newWidth, newHeight), 0, 0, gocv.InterpolationLinear) + defer scaled.Close() + logMessage(LogFile, Debug, " [TIMING] Step 4 (Scale frame): %dms", time.Since(step4Start).Milliseconds()) + logMessage(LogFile, Debug, " Scaled frame: %dx%d -> %dx%d", rotated.Cols(), rotated.Rows(), newWidth, newHeight) + + // CRITICAL: Scale the bounding box coordinates to match the scaled frame + scaledMinX := int(float64(minX) * scale) + scaledMaxX := int(float64(maxX) * scale) + scaledMinY := int(float64(minY) * scale) + scaledMaxY := int(float64(maxY) * scale) + scaledBoundingBox := image.Rect(scaledMinX, scaledMinY, scaledMaxX, scaledMaxY) + logMessage(LogFile, Debug, " Scaled bounding box: X=%d-%d, Y=%d-%d (from original: X=%d-%d, Y=%d-%d)", + scaledMinX, scaledMaxX, scaledMinY, scaledMaxY, minX, maxX, minY, maxY) + + // Scale all significant boxes coordinates too + var scaledSignificantBoxes []image.Rectangle + for _, box := range significantBoxes { + scaledBox := image.Rect( + int(float64(box.Min.X)*scale), + int(float64(box.Min.Y)*scale), + int(float64(box.Max.X)*scale), + int(float64(box.Max.Y)*scale), + ) + scaledSignificantBoxes = append(scaledSignificantBoxes, scaledBox) + } + + // Scale all boxes too (for visualization) + var scaledAllBoxes []image.Rectangle + for _, box := range allBoxes { + scaledBox := image.Rect( + int(float64(box.Min.X)*scale), + int(float64(box.Min.Y)*scale), + int(float64(box.Max.X)*scale), + int(float64(box.Max.Y)*scale), + ) + scaledAllBoxes = append(scaledAllBoxes, scaledBox) + } + + // All subsequent processing now works on the SCALED frame with SCALED coordinates + // This ensures boxes are calculated in scaled coordinates + + // STEP 5: Find 50% line + height := scaledBoundingBox.Dy() + line50 := scaledBoundingBox.Min.Y + height/2 + logMessage(LogFile, Debug, " 50%% line at Y=%d", line50) + + // DEBUG: Save step 5 visualization - bounding box and 50% line + bboxVis := gocv.NewMat() + gocv.CvtColor(scaled, &bboxVis, gocv.ColorGrayToBGR) + // Draw bounding box in blue + gocv.Rectangle(&bboxVis, scaledBoundingBox, color.RGBA{0, 100, 255, 255}, 3) + // Draw 50% line in red + gocv.Line(&bboxVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 3) + // Add labels + gocv.PutText(&bboxVis, fmt.Sprintf("BBox: %d-%d, %d-%d", scaledMinX, scaledMaxX, scaledMinY, scaledMaxY), + image.Pt(scaledMinX+10, scaledMinY+30), + gocv.FontHersheyPlain, 1.2, color.RGBA{0, 100, 255, 255}, 2) + gocv.PutText(&bboxVis, fmt.Sprintf("50%% line: Y=%d", line50), + image.Pt(scaledMinX+10, line50-10), + gocv.FontHersheyPlain, 1.2, color.RGBA{255, 0, 0, 255}, 2) + gocv.IMWrite("test_output/layout_step5_bbox_and_line.png", bboxVis) + bboxVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step5_bbox_and_line.png") + + // DEBUG: Save step 6 - all boxes with numbers + allBoxesVis := gocv.NewMat() + gocv.CvtColor(scaled, &allBoxesVis, gocv.ColorGrayToBGR) + for i, box := range scaledAllBoxes { + gocv.Rectangle(&allBoxesVis, box, color.RGBA{0, 255, 0, 255}, 2) + // Add box number + gocv.PutText(&allBoxesVis, fmt.Sprintf("%d", i), + image.Pt(box.Min.X+5, box.Min.Y+15), + gocv.FontHersheyPlain, 1.2, color.RGBA{0, 255, 0, 255}, 2) + } + gocv.IMWrite("test_output/layout_step6_all_boxes.png", allBoxesVis) + allBoxesVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step6_all_boxes.png") + + // STEP 6: Filter to boxes crossing 50% line (center region) + var centerBoxes []image.Rectangle + for _, box := range scaledSignificantBoxes { + if box.Min.Y < line50 && box.Max.Y > line50 { + centerBoxes = append(centerBoxes, box) + } + } + + if len(centerBoxes) == 0 { + logMessage(Both, Error, " ERROR: No boxes crossing 50%%%% line") + logMessage(Both, Error, " 50%%%% line at Y=%d, checked %d significant boxes", line50, len(scaledSignificantBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d boxes crossing 50%% line (center region)", len(centerBoxes)) + + // DEBUG: Save step 7 - center boxes visualization + centerBoxesVis := gocv.NewMat() + gocv.CvtColor(scaled, ¢erBoxesVis, gocv.ColorGrayToBGR) + // Draw 50% line + gocv.Line(¢erBoxesVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 3) + // Draw center boxes + for _, box := range centerBoxes { + gocv.Rectangle(¢erBoxesVis, box, color.RGBA{0, 255, 255, 255}, 2) + } + gocv.IMWrite("test_output/layout_step7_center_boxes.png", centerBoxesVis) + centerBoxesVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step7_center_boxes.png") + + // STEP 7: From center boxes, identify digit displays (height >= 110px) + var digitDisplays []image.Rectangle + for _, box := range centerBoxes { + logMessage(LogFile, Debug, " Center box: X=%d-%d (W=%d), Y=%d-%d (H=%d)", + box.Min.X, box.Max.X, box.Dx(), + box.Min.Y, box.Max.Y, box.Dy()) + if box.Dy() >= 110 { + digitDisplays = append(digitDisplays, box) + logMessage(LogFile, Debug, " āœ“ Added as digit display (H=%d >= 110)", box.Dy()) + } else { + logMessage(LogFile, Debug, " āœ— Skipped (H=%d < 110)", box.Dy()) + } + } + + if len(digitDisplays) == 0 { + logMessage(Both, Error, " ERROR: No digit displays found (height >= 110px) in center region") + logMessage(Both, Error, " Found %d center boxes, none met height requirement", len(centerBoxes)) + return nil, 0, nil + } + + logMessage(LogFile, Debug, " Found %d digit displays (height > 120px) BEFORE trimming", len(digitDisplays)) + + // DEBUG: Save step 8 - digit displays visualization + if DEBUG_MODE { + digitDisplaysVis := gocv.NewMat() + gocv.CvtColor(scaled, &digitDisplaysVis, gocv.ColorGrayToBGR) + for _, box := range digitDisplays { + gocv.Rectangle(&digitDisplaysVis, box, color.RGBA{255, 0, 255, 255}, 3) + } + gocv.IMWrite("test_output/layout_step8_digit_displays.png", digitDisplaysVis) + digitDisplaysVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step8_digit_displays.png") + } + + // STEP 8: Get Y range from digit displays + minDigitY := digitDisplays[0].Min.Y + maxDigitY := digitDisplays[0].Max.Y + for _, box := range digitDisplays { + if box.Min.Y < minDigitY { + minDigitY = box.Min.Y + } + if box.Max.Y > maxDigitY { + maxDigitY = box.Max.Y + } + } + logMessage(LogFile, Debug, " Digit display Y range: %d-%d", minDigitY, maxDigitY) + + // STEP 9: Find ALL boxes (from scaledAllBoxes) within this Y range + var boxesInRange []image.Rectangle + for _, box := range scaledAllBoxes { + // Box overlaps with Y range if its bottom is below minDigitY and top is above maxDigitY + if box.Max.Y > minDigitY && box.Min.Y < maxDigitY { + boxesInRange = append(boxesInRange, box) + } + } + logMessage(LogFile, Debug, " Found %d boxes in Y range %d-%d", len(boxesInRange), minDigitY, maxDigitY) + + // DEBUG: Save step 9 - boxes in range visualization + boxesInRangeVis := gocv.NewMat() + gocv.CvtColor(scaled, &boxesInRangeVis, gocv.ColorGrayToBGR) + // Draw Y range lines + gocv.Line(&boxesInRangeVis, image.Pt(0, minDigitY), image.Pt(boxesInRangeVis.Cols(), minDigitY), color.RGBA{255, 0, 0, 255}, 2) + gocv.Line(&boxesInRangeVis, image.Pt(0, maxDigitY), image.Pt(boxesInRangeVis.Cols(), maxDigitY), color.RGBA{255, 0, 0, 255}, 2) + // Draw digit displays in magenta + for _, box := range digitDisplays { + gocv.Rectangle(&boxesInRangeVis, box, color.RGBA{255, 0, 255, 255}, 3) + } + // Draw all boxes in range in cyan + for _, box := range boxesInRange { + gocv.Rectangle(&boxesInRangeVis, box, color.RGBA{0, 255, 255, 255}, 1) + } + gocv.IMWrite("test_output/layout_step9_boxes_in_range.png", boxesInRangeVis) + boxesInRangeVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step9_boxes_in_range.png") + + // STEP 10: Trim digit displays based on small boxes crossing their right edge + for i := range digitDisplays { + originalMaxX := digitDisplays[i].Max.X + newMaxX := originalMaxX + + // Check each box in range + for _, smallBox := range boxesInRange { + // Skip if this is the digit display itself + if smallBox == digitDisplays[i] { + continue + } + + // Check if small box crosses the right edge of this digit display + // Crosses if: smallBox.Min.X < digitDisplay.Max.X AND smallBox.Max.X > digitDisplay.Max.X + if smallBox.Min.X < digitDisplays[i].Max.X && smallBox.Max.X > digitDisplays[i].Max.X { + // Use the left edge of the small box as the new right edge + if smallBox.Min.X < newMaxX { + newMaxX = smallBox.Min.X + logMessage(LogFile, Debug, " Trimming digit display %d: right edge %d -> %d (small box at X=%d-%d)", + i, originalMaxX, newMaxX, smallBox.Min.X, smallBox.Max.X) + } + } + } + + digitDisplays[i].Max.X = newMaxX + } + + logMessage(LogFile, Debug, " Digit displays AFTER trimming: %d", len(digitDisplays)) + + // DEBUG: Save step 11 - trimmed digit displays visualization + trimmedVis := gocv.NewMat() + gocv.CvtColor(scaled, &trimmedVis, gocv.ColorGrayToBGR) + for _, box := range digitDisplays { + gocv.Rectangle(&trimmedVis, box, color.RGBA{0, 255, 0, 255}, 4) + } + gocv.IMWrite("test_output/layout_step11_trimmed_displays.png", trimmedVis) + trimmedVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step11_trimmed_displays.png") + + if len(digitDisplays) < 2 { + // Save debug visualization showing what was found + debugVis := gocv.NewMat() + gocv.CvtColor(scaled, &debugVis, gocv.ColorGrayToBGR) + + // Draw 50% line + gocv.Line(&debugVis, image.Pt(scaledMinX, line50), image.Pt(scaledMaxX, line50), color.RGBA{255, 0, 0, 255}, 2) + gocv.PutText(&debugVis, fmt.Sprintf("50%% line: Y=%d", line50), + image.Pt(10, line50-10), gocv.FontHersheyPlain, 1.5, color.RGBA{255, 0, 0, 255}, 2) + + // Draw all center boxes with labels + for i, box := range centerBoxes { + boxColor := color.RGBA{0, 255, 255, 255} // Cyan for rejected + label := fmt.Sprintf("#%d: H=%d", i, box.Dy()) + + // Check if this box qualified as digit display + qualified := false + for _, dd := range digitDisplays { + if box == dd { + qualified = true + break + } + } + + if qualified { + boxColor = color.RGBA{0, 255, 0, 255} // Green for qualified + label += " OK" + } else if box.Dy() < 110 { + label += " TOO SHORT" + } + + gocv.Rectangle(&debugVis, box, boxColor, 3) + gocv.PutText(&debugVis, label, + image.Pt(box.Min.X+5, box.Min.Y+25), + gocv.FontHersheyPlain, 1.5, boxColor, 2) + } + + // Add summary at top + summary := fmt.Sprintf("Found: %d center boxes, %d digit displays (need 2)", + len(centerBoxes), len(digitDisplays)) + gocv.PutText(&debugVis, summary, + image.Pt(10, 30), gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 255, 255}, 2) + gocv.PutText(&debugVis, "Requirement: Height >= 110px", + image.Pt(10, 60), gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 255, 255}, 2) + + timestamp := time.Now().Format("20060102_150405") + errorFilename := fmt.Sprintf("test_output/layout_error_%s.png", timestamp) + gocv.IMWrite(errorFilename, debugVis) + debugVis.Close() + + // Enhanced error message + logMessage(Both, Error, " ERROR: Only %d digit display(s) found (need 2)", len(digitDisplays)) + logMessage(Both, Error, " ") + logMessage(Both, Error, " LIKELY CAUSE: Pulse oximeter is not centered under camera") + logMessage(Both, Error, " - Device may be at an angle or offset to one side") + logMessage(Both, Error, " - Only one display (SpO2 or HR) is in the center detection region") + logMessage(Both, Error, " ") + logMessage(Both, Error, " ACTION REQUIRED: Physically reposition camera or device") + logMessage(Both, Error, " - Move device to be centered and level under camera") + logMessage(Both, Error, " - Or adjust camera angle to capture both displays") + logMessage(Both, Error, " ") + logMessage(Both, Error, " Technical details:") + logMessage(Both, Error, " Center boxes found: %d (crossing 50%% line at Y=%d)", len(centerBoxes), line50) + for i, box := range centerBoxes { + qualified := "" + for _, dd := range digitDisplays { + if box == dd { + qualified = " āœ“ QUALIFIED" + break + } + } + if qualified == "" { + if box.Dy() < 110 { + qualified = " āœ— TOO SHORT (< 110px)" + } + } + logMessage(Both, Error, " Box #%d: W=%dpx, H=%dpx at X=%d-%d, Y=%d-%d%s", + i, box.Dx(), box.Dy(), box.Min.X, box.Max.X, box.Min.Y, box.Max.Y, qualified) + } + logMessage(Both, Error, " šŸ’¾ Debug image saved: %s", errorFilename) + logMessage(Both, Error, " Green boxes = qualified, Cyan = rejected") + return nil, 0, nil + } + + // STEP 11: Get Y range from digit displays + minCenterY := digitDisplays[0].Min.Y + maxCenterY := digitDisplays[0].Max.Y + + for _, box := range digitDisplays { + if box.Min.Y < minCenterY { + minCenterY = box.Min.Y + } + if box.Max.Y > maxCenterY { + maxCenterY = box.Max.Y + } + } + + // Create center region + centerRegion := image.Rect(scaledBoundingBox.Min.X, minCenterY, scaledBoundingBox.Max.X, maxCenterY) + + // STEP 12: Find X-center to split left/right + centerX := centerRegion.Min.X + centerRegion.Dx()/2 + logMessage(LogFile, Debug, " Center X: %d", centerX) + + // STEP 13: Find rightmost X and Y range in each half + spo2RightX := -1 + spo2MinY := 10000 + spo2MaxY := 0 + hrRightX := -1 + hrMinY := 10000 + hrMaxY := 0 + + for _, box := range digitDisplays { + // Determine which half this box belongs to based on its center + boxCenterX := box.Min.X + box.Dx()/2 + + if boxCenterX < centerX { + // Left half (SpO2) + if box.Max.X > spo2RightX { + spo2RightX = box.Max.X + } + if box.Min.Y < spo2MinY { + spo2MinY = box.Min.Y + } + if box.Max.Y > spo2MaxY { + spo2MaxY = box.Max.Y + } + } else { + // Right half (HR) + if box.Max.X > hrRightX { + hrRightX = box.Max.X + } + if box.Min.Y < hrMinY { + hrMinY = box.Min.Y + } + if box.Max.Y > hrMaxY { + hrMaxY = box.Max.Y + } + } + } + + if spo2RightX == -1 || hrRightX == -1 { + logMessage(Both, Error, " ERROR: Could not find displays in both halves") + logMessage(Both, Error, " Left half (SpO2): rightX=%d, Right half (HR): rightX=%d", spo2RightX, hrRightX) + return nil, 0, nil + } + + // STEP 14: Create boxes with fixed CUT_WIDTH + spo2LeftX := spo2RightX - CUT_WIDTH + spo2Box := image.Rect(spo2LeftX, spo2MinY, spo2RightX, spo2MaxY) + + hrLeftX := hrRightX - CUT_WIDTH + hrBox := image.Rect(hrLeftX, hrMinY, hrRightX, hrMaxY) + + logMessage(LogFile, Debug, " Final SpO2 box: X=%d-%d, Y=%d-%d", spo2Box.Min.X, spo2Box.Max.X, spo2Box.Min.Y, spo2Box.Max.Y) + logMessage(LogFile, Debug, " Final HR box: X=%d-%d, Y=%d-%d", hrBox.Min.X, hrBox.Max.X, hrBox.Min.Y, hrBox.Max.Y) + + // DEBUG: Save step 15 - final boxes visualization + finalVis := gocv.NewMat() + gocv.CvtColor(scaled, &finalVis, gocv.ColorGrayToBGR) + gocv.Rectangle(&finalVis, spo2Box, color.RGBA{255, 0, 0, 255}, 4) + gocv.Rectangle(&finalVis, hrBox, color.RGBA{0, 255, 255, 255}, 4) + gocv.PutText(&finalVis, "SpO2", image.Pt(spo2Box.Min.X, spo2Box.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, color.RGBA{255, 0, 0, 255}, 2) + gocv.PutText(&finalVis, "HR", image.Pt(hrBox.Min.X, hrBox.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, color.RGBA{0, 255, 255, 255}, 2) + gocv.IMWrite("test_output/layout_step15_final_boxes.png", finalVis) + finalVis.Close() + logMessage(LogFile, Debug, " [DEBUG] Saved layout_step15_final_boxes.png") + + logMessage(LogFile, Debug, " [TIMING] Total layout detection: %dms", time.Since(startTime).Milliseconds()) + + // Return layout and scale factor + return &ScreenLayout{ + SpO2Area: spo2Box, + HRArea: hrBox, + }, scale, nil +} + +// Old functions kept for backward compatibility but deprecated +func groupAdjacentContours(rects []image.Rectangle) []Display { + var displays []Display + used := make([]bool, len(rects)) + + logMessage(LogFile, Debug, " Grouping %d candidate contours:", len(rects)) + for i := 0; i < len(rects); i++ { + if used[i] { + continue + } + + // Look for an adjacent rect with similar Y coordinate + var adjacent *int + for j := i + 1; j < len(rects); j++ { + if used[j] { + continue + } + + // Check if Y coordinates are similar (within 20 pixels) + yDiff := rects[i].Min.Y - rects[j].Min.Y + if yDiff < 0 { + yDiff = -yDiff + } + if yDiff > 20 { + logMessage(LogFile, Debug, " Skip pairing rect[%d] and rect[%d]: Y diff=%d > 20", i, j, yDiff) + continue + } + + // Check if they're side-by-side (allow small overlap or gap) + xGap := rects[j].Min.X - rects[i].Max.X + // Allow up to 50 pixels of overlap (negative gap) or up to 200 pixels of space + if xGap >= -50 && xGap < 200 { + logMessage(LogFile, Debug, " Pairing rect[%d] and rect[%d]: Y diff=%d, X gap=%d", i, j, yDiff, xGap) + adjacent = &j + break + } else { + logMessage(LogFile, Debug, " Skip pairing rect[%d] and rect[%d]: X gap=%d not in range [-50, 200)", i, j, xGap) + } + } + + if adjacent != nil { + // Two separate contours - use them as left and right + used[i] = true + used[*adjacent] = true + + left := rects[i] + right := rects[*adjacent] + + // Ensure left is actually on the left + if left.Min.X > right.Min.X { + left, right = right, left + } + + logMessage(LogFile, Debug, " DUAL: [%dx%d @(%d,%d)] + [%dx%d @(%d,%d)]", + left.Dx(), left.Dy(), left.Min.X, left.Min.Y, + right.Dx(), right.Dy(), right.Min.X, right.Min.Y) + + displays = append(displays, Display{ + IsSplit: false, + LeftRect: left, + RightRect: right, + }) + } else { + // Single contour - will be split in half + used[i] = true + + logMessage(LogFile, Debug, " SPLIT: [%dx%d @(%d,%d)]", + rects[i].Dx(), rects[i].Dy(), rects[i].Min.X, rects[i].Min.Y) + + displays = append(displays, Display{ + IsSplit: true, + FullRect: rects[i], + }) + } + } + + return displays +} + +func detectScreenLayout(rotated gocv.Mat) ([]Display, error) { + gray := gocv.NewMat() + gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray) + thresh := gocv.NewMat() + gocv.Threshold(gray, &thresh, 170, 255, gocv.ThresholdBinary) + gray.Close() + contours := gocv.FindContours(thresh, gocv.RetrievalExternal, gocv.ChainApproxSimple) + thresh.Close() + + var rects []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + if rect.Dy() > MIN_BOX_HEIGHT && rect.Dy() < 200 { + rects = append(rects, rect) + } + } + contours.Close() + + sort.Slice(rects, func(i, j int) bool { + return rects[i].Dy() > rects[j].Dy() + }) + + if len(rects) < 3 { + return nil, fmt.Errorf("found only %d contours (need 3+)", len(rects)) + } + + // Take up to 6 candidates to ensure we capture split digits + maxCands := 6 + if len(rects) < maxCands+1 { + maxCands = len(rects) - 1 + } + cands := rects[1 : 1+maxCands] + + // Sort by Y position for processing (Y desc, then X asc) + sort.Slice(cands, func(i, j int) bool { + if cands[i].Min.Y != cands[j].Min.Y { + return cands[i].Min.Y > cands[j].Min.Y + } + return cands[i].Min.X < cands[j].Min.X + }) + + logMessage(LogFile, Debug, " Selected %d candidates (excluding largest):", len(cands)) + for idx, rect := range cands { + logMessage(LogFile, Debug, " [%d]: %dx%d @(%d,%d)", idx, rect.Dx(), rect.Dy(), rect.Min.X, rect.Min.Y) + } + + // Group adjacent contours + displays := groupAdjacentContours(cands) + + logMessage(LogFile, Debug, " Found %d displays after grouping", len(displays)) + + // Group displays by Y position to find the digit row + // Displays within 30 pixels vertically are considered same row + type YGroup struct { + avgY int + displays []Display + } + var yGroups []YGroup + + for _, disp := range displays { + dispY := disp.FullRect.Min.Y + if !disp.IsSplit { + dispY = disp.LeftRect.Min.Y + } + + // Find existing group with similar Y + found := false + for i := range yGroups { + if math.Abs(float64(dispY-yGroups[i].avgY)) < 30 { + yGroups[i].displays = append(yGroups[i].displays, disp) + found = true + break + } + } + if !found { + yGroups = append(yGroups, YGroup{avgY: dispY, displays: []Display{disp}}) + } + } + + logMessage(LogFile, Debug, " Grouped into %d Y-bands:", len(yGroups)) + for idx, group := range yGroups { + logMessage(LogFile, Debug, " Band %d (Y~%d): %d displays", idx, group.avgY, len(group.displays)) + } + + // Find the band with exactly 2 displays (SpO2 and HR) + var digitDisplays []Display + for _, group := range yGroups { + if len(group.displays) == 2 { + digitDisplays = group.displays + break + } + } + + if len(digitDisplays) != 2 { + return nil, fmt.Errorf("could not find Y-band with exactly 2 displays (found %d bands)", len(yGroups)) + } + + // Sort by X position: left = SpO2, right = HR + sort.Slice(digitDisplays, func(i, j int) bool { + xI := digitDisplays[i].FullRect.Min.X + if !digitDisplays[i].IsSplit { + xI = digitDisplays[i].LeftRect.Min.X + } + xJ := digitDisplays[j].FullRect.Min.X + if !digitDisplays[j].IsSplit { + xJ = digitDisplays[j].LeftRect.Min.X + } + return xI < xJ + }) + + return digitDisplays, nil +} + +func saveLayoutVisualization(rotated gocv.Mat, layout *ScreenLayout, filename string) { + visualization := rotated.Clone() + + // Draw SpO2 box in red + red := color.RGBA{255, 0, 0, 255} + gocv.Rectangle(&visualization, layout.SpO2Area, red, 3) + gocv.PutText(&visualization, "SpO2", image.Pt(layout.SpO2Area.Min.X, layout.SpO2Area.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, red, 2) + + // Draw HR box in cyan + cyan := color.RGBA{0, 255, 255, 255} + gocv.Rectangle(&visualization, layout.HRArea, cyan, 3) + gocv.PutText(&visualization, "HR", image.Pt(layout.HRArea.Min.X, layout.HRArea.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, cyan, 2) + + gocv.IMWrite(filename, visualization) + visualization.Close() +} diff --git a/backups/backup_20251125/ocr.go b/backups/backup_20251125/ocr.go new file mode 100644 index 0000000..d489316 --- /dev/null +++ b/backups/backup_20251125/ocr.go @@ -0,0 +1,569 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "io" + "os" + "strings" + "time" + + "gocv.io/x/gocv" +) + +func loadTemplates() (map[int][]gocv.Mat, error) { + templates := make(map[int][]gocv.Mat) + files, err := os.ReadDir("training_digits") + if err != nil { + logMessage(Console, Warning, "āš ļø WARNING: training_digits directory not found or cannot be read: %v", err) + logMessage(Console, Warning, "āš ļø Starting with empty template set. Recognition will not work until templates are added.") + logMessage(Console, Info, "") + return templates, nil + } + templateCount := 0 + for _, file := range files { + if !strings.HasSuffix(file.Name(), ".png") { + continue + } + name := strings.TrimSuffix(file.Name(), ".png") + parts := strings.Split(name, "_") + var digit int + _, err := fmt.Sscanf(parts[0], "%d", &digit) + if err != nil || digit < 0 || digit > 9 { + continue + } + filename := fmt.Sprintf("training_digits/%s", file.Name()) + template := gocv.IMRead(filename, gocv.IMReadGrayScale) + if template.Empty() { + logMessage(Console, Warning, "Warning: Failed to load %s", filename) + continue + } + templates[digit] = append(templates[digit], template) + templateCount++ + } + + // Load invalid digit markers (use index -1 for invalid patterns) + invalidFiles, err := os.ReadDir("training_digits/invalid") + if err == nil { + invalidCount := 0 + for _, file := range invalidFiles { + if !strings.HasSuffix(file.Name(), ".png") { + continue + } + filename := fmt.Sprintf("training_digits/invalid/%s", file.Name()) + template := gocv.IMRead(filename, gocv.IMReadGrayScale) + if template.Empty() { + logMessage(Console, Warning, "Warning: Failed to load invalid template %s", filename) + continue + } + templates[-1] = append(templates[-1], template) + invalidCount++ + } + if invalidCount > 0 { + logMessage(Console, Info, "āœ“ Loaded %d invalid digit markers", invalidCount) + } + } + + for digit := 0; digit <= 9; digit++ { + if len(templates[digit]) == 0 { + logMessage(Console, Warning, "āš ļø WARNING: No templates found for digit %d", digit) + } + } + logMessage(Console, Info, "āœ“ Loaded %d digit templates (0-9)", templateCount) + return templates, nil +} + +func matchDigit(digitImg gocv.Mat, templates map[int][]gocv.Mat) (int, float64) { + bestDigit := -1 + bestScore := 0.0 + bestInvalidScore := 0.0 + + // Check invalid patterns first (stored at index -1) + invalidTemplates := templates[-1] + for _, template := range invalidTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestInvalidScore { + bestInvalidScore = score + } + } + + // Check regular digits (0-9) + for digit := 0; digit <= 9; digit++ { + digitTemplates := templates[digit] + if len(digitTemplates) == 0 { + // No templates for this digit, skip + continue + } + for _, template := range digitTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestScore { + bestScore = score + bestDigit = digit + } + } + } + + // If invalid pattern matches better than any digit (and score > 70%), return -1 + if bestInvalidScore > bestScore && bestInvalidScore > 70 { + return -1, bestInvalidScore + } + + return bestDigit, bestScore +} + +// Helper function: check if there's a '1' digit by cutting at the expected width and matching templates +// ONLY checks against '1' templates to prevent false matches with '4', '7', etc. +func hasOneAt(thresh gocv.Mat, x int, templates map[int][]gocv.Mat, logger io.Writer) bool { + h := thresh.Rows() + w := thresh.Cols() + + // Calculate the region where a '1' would be if it exists + // '1' is 72px wide, so extract from (x - 72) to x + leftEdge := x - DIGIT_ONE_WIDTH + if leftEdge < 0 { + leftEdge = 0 + } + + if x > w { + x = w + } + + logMessage(Both, Debug, " hasOneAt(x=%d): extracting region [%d..%d] (width=%d) from display width=%d", + x, leftEdge, x, x-leftEdge, w) + + // Extract the potential '1' region + region := thresh.Region(image.Rect(leftEdge, 0, x, h)) + digitImg := region.Clone() + region.Close() + + // Match ONLY against '1' templates (don't check other digits) + oneTemplates := templates[1] + if len(oneTemplates) == 0 { + logMessage(LogFile, Warning, " hasOneAt(x=%d): No '1' templates loaded -> NO", x) + digitImg.Close() + return false + } + + bestScore := 0.0 + for _, template := range oneTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestScore { + bestScore = score + } + } + digitImg.Close() + + // If it matches '1' with >85% confidence, we have a '1' + if bestScore > 85.0 { + logMessage(Both, Debug, " hasOneAt(x=%d): matched '1' with %.1f%% confidence -> YES", x, bestScore) + return true + } + + logMessage(Both, Debug, " hasOneAt(x=%d): best '1' match %.1f%% confidence -> NO", x, bestScore) + return false +} + +// validateEight checks if a digit image has the two characteristic holes of an '8' +// by checking for black pixels along horizontal lines where holes should be +func validateEight(digitImg gocv.Mat, logger io.Writer) bool { + h := digitImg.Rows() + w := digitImg.Cols() + + // Upper hole: 30% of height + // Lower hole: 70% of height + // Horizontal range: 40%-50% of width + topHoleY := (h * 30) / 100 + bottomHoleY := (h * 70) / 100 + xStart := (w * 40) / 100 + xEnd := (w * 50) / 100 + + // Check for ANY black pixel in the horizontal range + // In thresholded image: black (0) = hole, white (255) = digit + topHoleFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(topHoleY, x) + if pixel < 128 { + topHoleFound = true + break + } + } + + bottomHoleFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(bottomHoleY, x) + if pixel < 128 { + bottomHoleFound = true + break + } + } + + logMessage(LogFile, Debug, " validateEight: top hole @ y=%d (30%%) x=%d-%d (%v), bottom hole @ y=%d (70%%) x=%d-%d (%v)", + topHoleY, xStart, xEnd, topHoleFound, + bottomHoleY, xStart, xEnd, bottomHoleFound) + + return topHoleFound && bottomHoleFound +} + +// validateZero checks if a digit image has the characteristic hole of a '0' +// by checking for black pixels along a horizontal line at the center +func validateZero(digitImg gocv.Mat, logger io.Writer) bool { + h := digitImg.Rows() + w := digitImg.Cols() + + // Center hole: 50% of height + // Horizontal range: 30%-70% of width (wider range than '8' since '0' hole is larger) + centerY := h / 2 + xStart := (w * 30) / 100 + xEnd := (w * 70) / 100 + + // Check for ANY black pixel in the horizontal range + // In thresholded image: black (0) = hole, white (255) = digit + holeFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(centerY, x) + if pixel < 128 { + holeFound = true + break + } + } + + logMessage(LogFile, Debug, " validateZero: center hole @ y=%d (50%%) x=%d-%d (%v)", + centerY, xStart, xEnd, holeFound) + + return holeFound +} + +// New simplified version that works with fixed-width rectangles +func recognizeDisplayArea(rotated gocv.Mat, area image.Rectangle, templates map[int][]gocv.Mat, displayName string, frameCount int, logger io.Writer) (int, int, float64, int, float64) { + startTime := time.Now() + cleanName := strings.TrimSpace(strings.ToLower(displayName)) + + // Extract the display area (ALREADY THRESHOLDED BINARY IMAGE) + region := rotated.Region(area) + thresh := region.Clone() + region.Close() + + // DEBUG: Save extracted region - already thresholded + if DEBUG_MODE { + step1Filename := fmt.Sprintf("review/f%d_%s_step1_extracted.png", frameCount, cleanName) + gocv.IMWrite(step1Filename, thresh) + } + + w := thresh.Cols() + h := thresh.Rows() + + // Log display dimensions + logMessage(LogFile, Debug, " %s display: w=%d, h=%d", displayName, w, h) + + // Sequential check for '1' patterns to determine digit widths + // hasOneAt checks if column X is white and column X+10 is black (detects RIGHT EDGE where '1' ends) + + // Digit detection (right to left: digit3, digit2, digit1) + // D stores cut positions, digitIsOne stores detection results + D := make([]int, 4) // indices 1,2,3 (0 unused) + digitIsOne := make([]bool, 4) // indices 1,2,3 (0 unused) + + totalWidth := 0 + + logMessage(Both, Debug, " [%s] Display width=%d, starting digit detection...", displayName, w) + + detectionStart := time.Now() + for i := 3; i >= 1; i-- { + var checkPos int + if i == 3 { + checkPos = w // Check at right edge, not w-8 + } else { + checkPos = w - totalWidth - 5 + } + + logMessage(Both, Debug, " [%s] Digit %d: checkPos=%d (w=%d, totalWidth=%d)", displayName, i, checkPos, w, totalWidth) + digitIsOne[i] = hasOneAt(thresh, checkPos, templates, logger) + + // Add width to accumulator + var widthUsed int + if digitIsOne[i] { + widthUsed = DIGIT_ONE_WIDTH + totalWidth += DIGIT_ONE_WIDTH + logMessage(Both, Debug, " [%s] Digit %d: IS ONE -> using width=%d (totalWidth now %d)", displayName, i, widthUsed, totalWidth) + } else { + widthUsed = DIGIT_NON_ONE_WIDTH + totalWidth += DIGIT_NON_ONE_WIDTH + logMessage(Both, Debug, " [%s] Digit %d: NOT ONE -> using width=%d (totalWidth now %d)", displayName, i, widthUsed, totalWidth) + } + + D[i] = w - totalWidth + logMessage(Both, Debug, " [%s] Digit %d: CUT position D[%d]=%d", displayName, i, i, D[i]) + } + logMessage(LogFile, Debug, " [TIMING][%s] Digit detection (hasOneAt x3): %dms", displayName, time.Since(detectionStart).Milliseconds()) + + logMessage(Both, Debug, " [%s] Final cut positions: D[1]=%d, D[2]=%d, D[3]=%d", displayName, D[1], D[2], D[3]) + + // Create visualization image showing check positions AND cut positions + visImg := gocv.NewMat() + gocv.CvtColor(thresh, &visImg, gocv.ColorGrayToBGR) + + // Draw vertical lines at check positions with labels + greenColor := color.RGBA{0, 255, 0, 255} // Green for '1' detected + redColor := color.RGBA{0, 0, 255, 255} // Red for no '1' + yellowColor := color.RGBA{0, 255, 255, 255} // Yellow for CUT lines + + // Digit 3 check line + digit3Color := redColor + if digitIsOne[3] { + digit3Color = greenColor + } + gocv.Line(&visImg, image.Pt(w-8, 0), image.Pt(w-8, h), digit3Color, 2) + gocv.PutText(&visImg, "D3", image.Pt(w-8+5, 20), gocv.FontHersheyDuplex, 0.5, digit3Color, 1) + + // Digit 2 check line (calculate position from accumulated width) + var digit2Width int + if digitIsOne[3] { + digit2Width = DIGIT_ONE_WIDTH + } else { + digit2Width = DIGIT_NON_ONE_WIDTH + } + digit2CheckPos := w - digit2Width - 5 + digit2Color := redColor + if digitIsOne[2] { + digit2Color = greenColor + } + gocv.Line(&visImg, image.Pt(digit2CheckPos, 0), image.Pt(digit2CheckPos, h), digit2Color, 2) + gocv.PutText(&visImg, "D2", image.Pt(digit2CheckPos+5, 40), gocv.FontHersheyDuplex, 0.5, digit2Color, 1) + + // Digit 1 check line (calculate position from accumulated width) + var digit3Width int + if digitIsOne[3] { + digit3Width = DIGIT_ONE_WIDTH + } else { + digit3Width = DIGIT_NON_ONE_WIDTH + } + var digit2WidthForD1 int + if digitIsOne[2] { + digit2WidthForD1 = DIGIT_ONE_WIDTH + } else { + digit2WidthForD1 = DIGIT_NON_ONE_WIDTH + } + digit1CheckPos := w - digit3Width - digit2WidthForD1 - 5 + digit1Color := redColor + if digitIsOne[1] { + digit1Color = greenColor + } + gocv.Line(&visImg, image.Pt(digit1CheckPos, 0), image.Pt(digit1CheckPos, h), digit1Color, 2) + gocv.PutText(&visImg, "D1", image.Pt(digit1CheckPos+5, 60), gocv.FontHersheyDuplex, 0.5, digit1Color, 1) + + // Draw CUT lines in yellow showing where we actually split the digits + gocv.Line(&visImg, image.Pt(D[2], 0), image.Pt(D[2], h), yellowColor, 3) + gocv.PutText(&visImg, "CUT1", image.Pt(D[2]+5, h-10), gocv.FontHersheyDuplex, 0.6, yellowColor, 2) + + gocv.Line(&visImg, image.Pt(D[3], 0), image.Pt(D[3], h), yellowColor, 3) + gocv.PutText(&visImg, "CUT2", image.Pt(D[3]+5, h-30), gocv.FontHersheyDuplex, 0.6, yellowColor, 2) + + // Save visualization (always - needed for debugging failures) + visFilename := fmt.Sprintf("review/f%d_%s_checks.png", frameCount, cleanName) + gocv.IMWrite(visFilename, visImg) + visImg.Close() + + // Clamp cut positions to valid range + if D[3] < 0 { + D[3] = 0 + } + if D[2] < 0 { + D[2] = 0 + } + + // Extract three digit regions + logMessage(Both, Debug, " [%s] Extracting digit1: region [0..%d] (width=%d)", displayName, D[2], D[2]) + digit1Region := thresh.Region(image.Rect(0, 0, D[2], h)) + digit1 := digit1Region.Clone() + digit1Region.Close() + + logMessage(Both, Debug, " [%s] Extracting digit2: region [%d..%d] (width=%d)", displayName, D[2], D[3], D[3]-D[2]) + digit2Region := thresh.Region(image.Rect(D[2], 0, D[3], h)) + digit2 := digit2Region.Clone() + digit2Region.Close() + + logMessage(Both, Debug, " [%s] Extracting digit3: region [%d..%d] (width=%d)", displayName, D[3], w, w-D[3]) + digit3Region := thresh.Region(image.Rect(D[3], 0, w, h)) + digit3 := digit3Region.Clone() + digit3Region.Close() + + // Match all three digits + matchStart := time.Now() + num1, score1 := matchDigit(digit1, templates) + num2, score2 := matchDigit(digit2, templates) + num3, score3 := matchDigit(digit3, templates) + logMessage(LogFile, Debug, " [TIMING][%s] matchDigit x3: %dms", displayName, time.Since(matchStart).Milliseconds()) + + // Validate '8' digits - check for two holes + if num1 == 8 && !validateEight(digit1, logger) { + logMessage(LogFile, Debug, " āš ļø Digit1 recognized as 8 but failed hole validation - marking invalid") + num1 = -1 + score1 = 0.0 + } + if num2 == 8 && !validateEight(digit2, logger) { + logMessage(LogFile, Debug, " āš ļø Digit2 recognized as 8 but failed hole validation - marking invalid") + num2 = -1 + score2 = 0.0 + } + if num3 == 8 && !validateEight(digit3, logger) { + logMessage(LogFile, Debug, " āš ļø Digit3 recognized as 8 but failed hole validation - marking invalid") + num3 = -1 + score3 = 0.0 + } + + // Validate '0' digits - check for center hole + if num1 == 0 && !validateZero(digit1, logger) { + logMessage(LogFile, Debug, " āš ļø Digit1 recognized as 0 but failed hole validation - marking invalid") + num1 = -1 + score1 = 0.0 + } + if num2 == 0 && !validateZero(digit2, logger) { + logMessage(LogFile, Debug, " āš ļø Digit2 recognized as 0 but failed hole validation - marking invalid") + num2 = -1 + score2 = 0.0 + } + if num3 == 0 && !validateZero(digit3, logger) { + logMessage(LogFile, Debug, " āš ļø Digit3 recognized as 0 but failed hole validation - marking invalid") + num3 = -1 + score3 = 0.0 + } + + // Calculate final number + var number int + if digitIsOne[1] { + // 3-digit number: 1XX + number = 100 + num2*10 + num3 + } else { + // 2-digit number: XX (digit1 region is mostly empty/padding) + number = num2*10 + num3 + } + + // Save individual digits (only when SAVE_CROPS flag is set - for approving templates) + if SAVE_CROPS { + saveStart := time.Now() + digit1Filename := fmt.Sprintf("review/f%d_%s_digit1.png", frameCount, cleanName) + digit2Filename := fmt.Sprintf("review/f%d_%s_digit2.png", frameCount, cleanName) + digit3Filename := fmt.Sprintf("review/f%d_%s_digit3.png", frameCount, cleanName) + gocv.IMWrite(digit1Filename, digit1) + gocv.IMWrite(digit2Filename, digit2) + gocv.IMWrite(digit3Filename, digit3) + logMessage(LogFile, Debug, " [TIMING][%s] Save digit images: %dms", displayName, time.Since(saveStart).Milliseconds()) + } + + // Create labeled full image (combine all three digits) + combinedWidth := digit1.Cols() + digit2.Cols() + digit3.Cols() + combinedHeight := digit1.Rows() + if digit2.Rows() > combinedHeight { + combinedHeight = digit2.Rows() + } + if digit3.Rows() > combinedHeight { + combinedHeight = digit3.Rows() + } + + fullForLabel := gocv.NewMatWithSize(combinedHeight, combinedWidth, digit1.Type()) + + // Copy digit1 + digit1ROI := fullForLabel.Region(image.Rect(0, 0, digit1.Cols(), digit1.Rows())) + digit1.CopyTo(&digit1ROI) + digit1ROI.Close() + + // Copy digit2 + digit2ROI := fullForLabel.Region(image.Rect(digit1.Cols(), 0, digit1.Cols()+digit2.Cols(), digit2.Rows())) + digit2.CopyTo(&digit2ROI) + digit2ROI.Close() + + // Copy digit3 + digit3ROI := fullForLabel.Region(image.Rect(digit1.Cols()+digit2.Cols(), 0, combinedWidth, digit3.Rows())) + digit3.CopyTo(&digit3ROI) + digit3ROI.Close() + + // Add padding at top for label + labeledFull := gocv.NewMat() + gocv.CopyMakeBorder(fullForLabel, &labeledFull, 60, 0, 0, 0, gocv.BorderConstant, color.RGBA{0, 0, 0, 255}) + fullForLabel.Close() + + // Draw label + label := fmt.Sprintf("%d", number) + textColor := color.RGBA{255, 255, 255, 255} + gocv.PutText(&labeledFull, label, image.Pt(10, 45), gocv.FontHersheyDuplex, 1.8, textColor, 3) + + // Save labeled full image (always - needed for debugging failures) + fullFilename := fmt.Sprintf("review/f%d_%s_full.png", frameCount, cleanName) + gocv.IMWrite(fullFilename, labeledFull) + labeledFull.Close() + + digit1.Close() + digit2.Close() + digit3.Close() + thresh.Close() + + // Calculate widths for logging + digit3Width = DIGIT_ONE_WIDTH + if !digitIsOne[3] { + digit3Width = DIGIT_NON_ONE_WIDTH + } + digit2Width = DIGIT_ONE_WIDTH + if !digitIsOne[2] { + digit2Width = DIGIT_NON_ONE_WIDTH + } + + // Print debug info to log only + if digitIsOne[1] { + logMessage(LogFile, Info, " %s: 1%d%d [3-DIGIT: d1=1@%dpx (x=0-%d), d2=%s@%dpx (x=%d-%d), d3=%s@%dpx (x=%d-%d)] (scores: %d=%.0f%%, %d=%.0f%%, %d=%.0f%%)", + displayName, num2, num3, D[2], D[2], + map[bool]string{true: "1", false: "X"}[digitIsOne[2]], digit2Width, D[2], D[3], + map[bool]string{true: "1", false: "X"}[digitIsOne[3]], digit3Width, D[3], w, + num1, score1, num2, score2, num3, score3) + } else { + logMessage(LogFile, Info, " %s: %d%d [2-DIGIT: d2=%s@%dpx (x=%d-%d), d3=%s@%dpx (x=%d-%d)] (scores: %d=%.0f%%, %d=%.0f%%)", + displayName, num2, num3, + map[bool]string{true: "1", false: "X"}[digitIsOne[2]], digit2Width, D[2], D[3], + map[bool]string{true: "1", false: "X"}[digitIsOne[3]], digit3Width, D[3], w, + num2, score2, num3, score3) + } + + logMessage(LogFile, Debug, " [TIMING][%s] Total recognizeDisplayArea: %dms", displayName, time.Since(startTime).Milliseconds()) + + // Return using middle two digits for compatibility with existing code + return number, num2, score2, num3, score3 +} diff --git a/backups/backup_20251125/processor.go b/backups/backup_20251125/processor.go new file mode 100644 index 0000000..ad7af8a --- /dev/null +++ b/backups/backup_20251125/processor.go @@ -0,0 +1,299 @@ +package main + +import ( + "fmt" + "image" + "io" + "time" + + "gocv.io/x/gocv" +) + +// Processor handles the frame processing pipeline +type Processor struct { + templates map[int][]gocv.Mat + config *Config + logger io.Writer +} + +// NewProcessor creates a new processor +func NewProcessor(templates map[int][]gocv.Mat, config *Config, logger io.Writer) *Processor { + return &Processor{ + templates: templates, + config: config, + logger: logger, + } +} + +// buildReviewEntry creates a ReviewEntry from a Reading +func buildReviewEntry(reading *Reading, failed bool, failureReason string, unstable bool, unstableReason string) ReviewEntry { + return ReviewEntry{ + FrameNum: reading.FrameNum, + Timestamp: reading.Timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Failed: failed, + FailureReason: failureReason, + Unstable: unstable, + UnstableReason: unstableReason, + } +} + +// handleFrameFailure handles all failed frame processing: +// - Saves debug files (raw frame + layout visualization) +// - Creates and appends review entry +// - Calculates total timing +// - Returns ProcessingResult with given status +func handleFrameFailure( + rawFrame gocv.Mat, + normalized gocv.Mat, + layout *ScreenLayout, + reading *Reading, + status ProcessingStatus, + failureReason string, + state *ProcessingState, + timing *TimingData, + frameStartTime time.Time, +) ProcessingResult { + // Save debug files + fileIOStart := time.Now() + saveThresholdedFrame(rawFrame, reading.FrameNum) + saveLayoutVisualization(normalized, layout, fmt.Sprintf("review/f%d_boxes.jpg", reading.FrameNum)) + timing.FileIO = time.Since(fileIOStart).Milliseconds() + + // Create and append review entry + entry := buildReviewEntry(reading, true, failureReason, false, "") + state.ReviewEntries = append(state.ReviewEntries, entry) + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + // Calculate total timing + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: status, + Reading: *reading, + } +} + +// processFrame runs OCR on a normalized frame with valid layout +// rawFrame is the untouched frame from source (used for saving on errors) +func (p *Processor) processFrame(frame gocv.Mat, rawFrame gocv.Mat, frameNum int, state *ProcessingState, timing *TimingData) ProcessingResult { + timestamp := time.Now().Format("15:04:05.000") + frameStartTime := time.Now() + + logMessage(LogFile, Info, "Frame #%d", frameNum) + + // Apply scaling if needed + scaleStart := time.Now() + var normalized gocv.Mat + if state.LockedScale != 1.0 { + newWidth := int(float64(frame.Cols()) * state.LockedScale) + newHeight := int(float64(frame.Rows()) * state.LockedScale) + normalized = gocv.NewMat() + gocv.Resize(frame, &normalized, image.Pt(newWidth, newHeight), 0, 0, gocv.InterpolationLinear) + defer normalized.Close() + logMessage(LogFile, Debug, " Applied scaling: %dx%d -> %dx%d (scale: %.3f)", + frame.Cols(), frame.Rows(), newWidth, newHeight, state.LockedScale) + } else { + normalized = frame + } + timing.Scale = time.Since(scaleStart).Milliseconds() + + // Run OCR + logMessage(LogFile, Info, " Recognizing displays...") + + spo2Start := time.Now() + spo2Val, spo2Left, spo2LeftConf, spo2Right, spo2RightConf := recognizeDisplayArea( + normalized, state.Layout.SpO2Area, p.templates, "SpO2", frameNum, p.logger) + timing.OCR_SpO2 = time.Since(spo2Start).Milliseconds() + + hrStart := time.Now() + hrVal, hrLeft, hrLeftConf, hrRight, hrRightConf := recognizeDisplayArea( + normalized, state.Layout.HRArea, p.templates, "HR", frameNum, p.logger) + timing.OCR_HR = time.Since(hrStart).Milliseconds() + + reading := Reading{ + SpO2: spo2Val, + SpO2LeftDigit: spo2Left, + SpO2LeftConf: spo2LeftConf, + SpO2RightDigit: spo2Right, + SpO2RightConf: spo2RightConf, + HR: hrVal, + HRLeftDigit: hrLeft, + HRLeftConf: hrLeftConf, + HRRightDigit: hrRight, + HRRightConf: hrRightConf, + Timestamp: timestamp, + FrameNum: frameNum, + } + + // HANDLER #1: Check for corruption - ANY -1 digit means invalid/corrupted + if reading.IsCorrupted() { + validationStart := time.Now() + timing.Validation += time.Since(validationStart).Milliseconds() + + logMessage(LogFile, Warning, " Frame #%d: Corruption detected - %s matched invalid pattern (marked as -1)", + frameNum, reading.GetCorruptionDetails()) + logMessage(LogFile, Info, " [CORRUPTION] Frame #%d - Digit = -1 detected: SpO2(%d,%d) HR(%d,%d) - skipping frame", + frameNum, reading.SpO2LeftDigit, reading.SpO2RightDigit, reading.HRLeftDigit, reading.HRRightDigit) + + // Save extra debug files in DEBUG_MODE + if DEBUG_MODE { + debugTimestamp := time.Now().Format("20060102_150405") + gocv.IMWrite(fmt.Sprintf("test_output/corruption_%s_frame%d_normalized.png", debugTimestamp, frameNum), normalized) + saveLayoutVisualization(normalized, state.Layout, fmt.Sprintf("test_output/corruption_%s_frame%d_layout.jpg", debugTimestamp, frameNum)) + logMessage(LogFile, Info, " šŸ’¾ Debug saved: %s", debugTimestamp) + } + + return handleFrameFailure(rawFrame, normalized, state.Layout, &reading, StatusCorrupted, + fmt.Sprintf("Corruption: %s", reading.GetCorruptionDetails()), state, timing, frameStartTime) + } + + // HANDLER #2: Check for unrecognized digits (negative values) + if reading.SpO2 < 0 || reading.HR < 0 { + validationStart := time.Now() + timing.Validation += time.Since(validationStart).Milliseconds() + + logMessage(LogFile, Info, " [UNRECOGNIZED] SpO2=%d, HR=%d - treating as low confidence", reading.SpO2, reading.HR) + + return handleFrameFailure(rawFrame, normalized, state.Layout, &reading, StatusLowConfidence, + fmt.Sprintf("Unrecognized: SpO2=%d, HR=%d", reading.SpO2, reading.HR), state, timing, frameStartTime) + } + + // Check if values changed + validationStart := time.Now() + valuesChanged := (reading.SpO2 != state.LastPosted.SpO2 || reading.HR != state.LastPosted.HR) + timing.Validation += time.Since(validationStart).Milliseconds() + + if !valuesChanged { + timing.Total = time.Since(frameStartTime).Milliseconds() + logMessage(LogFile, Debug, " Frame #%d: SpO2=%d%%, HR=%d bpm (no change) - processed in %dms", + frameNum, reading.SpO2, reading.HR, timing.Total) + return ProcessingResult{Status: StatusNoChange, Reading: reading} + } + + // Values changed - update state and log + state.LastPosted = reading + logMessage(Both, Info, "SpO2=%d%%, HR=%d bpm", reading.SpO2, reading.HR) + + // HANDLER #3: Check confidence + validationStart = time.Now() + action, _ := validateConfidence(&reading, state, p.logger) + timing.Validation += time.Since(validationStart).Milliseconds() + + if action == ActionRetry { + state.LowConfidenceCount++ + logMessage(Both, Warning, " Low confidence (#%d) - grabbing next frame immediately...", state.LowConfidenceCount) + + spo2Avg, hrAvg := reading.AvgConfidence() + return handleFrameFailure(rawFrame, normalized, state.Layout, &reading, StatusLowConfidence, + fmt.Sprintf("Low confidence: SpO2 %.1f%%, HR %.1f%%", spo2Avg, hrAvg), state, timing, frameStartTime) + } + + if action == ActionDiscard { + state.LowConfidenceCount++ + logMessage(Both, Warning, " Low confidence after retry (#%d)", state.LowConfidenceCount) + + spo2Avg, hrAvg := reading.AvgConfidence() + return handleFrameFailure(rawFrame, normalized, state.Layout, &reading, StatusLowConfidence, + fmt.Sprintf("Low confidence: SpO2 %.1f%%, HR %.1f%%", spo2Avg, hrAvg), state, timing, frameStartTime) + } + + // HANDLER #4: Check stability + validationStart = time.Now() + action, reason := validateStability(&reading, state, p.logger) + timing.Validation += time.Since(validationStart).Milliseconds() + + // Check for physiologically impossible values (< 40) + if reading.SpO2 < 40 || reading.HR < 40 { + timing.HASS = 0 + logMessage(Both, Warning, " Invalid physiological values: SpO2=%d, HR=%d (below 40) - not posting to HASS", + reading.SpO2, reading.HR) + + entry := buildReviewEntry(&reading, false, "", false, "") + state.ReviewEntries = append(state.ReviewEntries, entry) + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{Status: StatusSuccess, Reading: reading, ShouldPost: false} + } + + // Handle stability action + if action == ActionHold { + timing.HASS = 0 + entry := buildReviewEntry(&reading, false, "", true, reason) + state.ReviewEntries = append(state.ReviewEntries, entry) + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + logMessage(LogFile, Warning, " %s - holding for validation", reason) + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{Status: StatusUnstable, Reading: reading, ShouldPost: false, UnstableReason: reason} + } + + // SUCCESS - ActionPost or default + timing.HASS = 0 // Will be updated in main loop + entry := buildReviewEntry(&reading, false, "", false, "") + state.ReviewEntries = append(state.ReviewEntries, entry) + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{Status: StatusSuccess, Reading: reading, ShouldPost: action == ActionPost} +} + +// postReading posts a reading to Home Assistant +func (p *Processor) postReading(reading *Reading, state *ProcessingState) error { + // Skip posting if no config (single-frame test mode) + if p.config == nil { + logMessage(LogFile, Info, " ā“˜ Skipping HASS post (test mode)") + return nil + } + + spo2Err := postToHomeAssistant(p.config, "sensor.pulse_ox_spo2", reading.SpO2, "%", "SpO2") + hrErr := postToHomeAssistant(p.config, "sensor.pulse_ox_hr", reading.HR, "bpm", "Heart Rate") + + if spo2Err == nil && hrErr == nil { + state.SuccessCount++ + logMessage(LogFile, Info, " āœ“ Posted successfully (success: %d, fail: %d)", + state.SuccessCount, state.FailureCount) + return nil + } + + state.FailureCount++ + if spo2Err != nil { + logMessage(LogFile, Error, " āŒ SpO2 post error: %v", spo2Err) + } + if hrErr != nil { + logMessage(LogFile, Error, " āŒ HR post error: %v", hrErr) + } + logMessage(LogFile, Info, " (success: %d, fail: %d)", state.SuccessCount, state.FailureCount) + logMessage(Both, Error, " āŒ Post failed") + + if spo2Err != nil { + return spo2Err + } + return hrErr +} + +// saveThresholdedFrame saves the RAW thresholded frame +// Frame is ALREADY thresholded binary (from acquisition), just save it directly +// Useful for debugging and can be tested with ./pulseox-monitor raw_frames/thresh_*.png +func saveThresholdedFrame(frame gocv.Mat, frameNum int) { + filename := fmt.Sprintf("raw_frames/thresh_%s-%05d.png", time.Now().Format("20060102"), frameNum) + gocv.IMWrite(filename, frame) +} diff --git a/backups/backup_20251125/pulseox-monitor.go b/backups/backup_20251125/pulseox-monitor.go new file mode 100644 index 0000000..c68ae43 --- /dev/null +++ b/backups/backup_20251125/pulseox-monitor.go @@ -0,0 +1,551 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "os" + "os/signal" + "syscall" + "time" + + "gocv.io/x/gocv" +) + +const VERSION = "v3.57" + +// Global debug flag +var DEBUG_MODE = false + +// Global timing flag +var TIMING_MODE = false + +// Global save crops flag +var SAVE_CROPS = false + +// Display and digit measurement constants +const ( + CUT_WIDTH = 280 + DIGIT_ONE_WIDTH = 72 + DIGIT_NON_ONE_WIDTH = 100 + MIN_BOX_HEIGHT = 110 +) + +func main() { + // Check for /help flag first + for _, arg := range os.Args[1:] { + if arg == "/help" || arg == "--help" || arg == "-h" { + showHelp() + return + } + } + + logMessage(Console, Info, "=== Pulse-Ox Monitor %s (Refactored Architecture) ===", VERSION) + logMessage(Console, Info, "") + + // Limit OpenCV thread pool to reduce CPU overhead + // Small images (280x200px) don't benefit from multi-threading + gocv.SetNumThreads(1) + logMessage(Console, Info, "šŸ”§ OpenCV threads limited to 1 (single-threaded for minimal overhead)") + logMessage(Console, Info, "") + + // Check for flags + for _, arg := range os.Args[1:] { + if arg == "/debug" { + DEBUG_MODE = true + logMessage(Console, Info, "šŸ› DEBUG MODE ENABLED") + } else if arg == "/timing" { + TIMING_MODE = true + logMessage(Console, Info, "ā±ļø TIMING MODE ENABLED") + } else if arg == "/crops" { + SAVE_CROPS = true + logMessage(Console, Info, "āœ‚ļø SAVE CROPS ENABLED (individual digit images)") + } + } + + // Check if running in single-frame test mode + if len(os.Args) >= 2 { + // Filter out flags + framePath := "" + for _, arg := range os.Args[1:] { + if arg != "/debug" && arg != "/timing" && arg != "/crops" { + framePath = arg + break + } + } + if framePath != "" { + runSingleFrameMode(framePath) + return + } + } + + // Normal streaming mode + runStreamingMode() +} + +func showHelp() { + logMessage(Console, Info, "=== Pulse-Ox Monitor %s ===", VERSION) + logMessage(Console, Info, "") + logMessage(Console, Info, "USAGE:") + logMessage(Console, Info, " pulseox-monitor [options] [framefile]") + logMessage(Console, Info, "") + logMessage(Console, Info, "OPTIONS:") + logMessage(Console, Info, " /help Show this help message") + logMessage(Console, Info, " /debug Enable debug mode (extra diagnostic output)") + logMessage(Console, Info, " /timing Show timing table for performance analysis") + logMessage(Console, Info, " /crops Save individual digit crop images (for approving templates)") + logMessage(Console, Info, "") + logMessage(Console, Info, "MODES:") + logMessage(Console, Info, " No arguments - Run in streaming mode (RTSP camera)") + logMessage(Console, Info, " [framefile] - Test mode: process single PNG file") + logMessage(Console, Info, "") + logMessage(Console, Info, "EXAMPLES:") + logMessage(Console, Info, " pulseox-monitor # Normal streaming") + logMessage(Console, Info, " pulseox-monitor /crops # Streaming with digit crops") + logMessage(Console, Info, " pulseox-monitor /timing # Show performance timing") + logMessage(Console, Info, " pulseox-monitor raw_frames/thresh_*.png # Test single frame") + logMessage(Console, Info, " pulseox-monitor /debug /crops # Multiple flags") + logMessage(Console, Info, "") + logMessage(Console, Info, "OUTPUT:") + logMessage(Console, Info, " review/ - Processed frame images and review.html") + logMessage(Console, Info, " raw_frames/ - Failed recognition frames (for debugging)") + logMessage(Console, Info, " test_output/ - Layout detection debug images") + logMessage(Console, Info, " pulse-monitor_*.log - Detailed execution log") + logMessage(Console, Info, "") +} + +func runSingleFrameMode(framePath string) { + logMessage(Console, Info, "=== Single Frame Test Mode ===") + logMessage(Console, Info, "Loading frame: %s", framePath) + logMessage(Console, Info, "") + + // Automatically enable DEBUG_MODE and SAVE_CROPS for file processing + DEBUG_MODE = true + SAVE_CROPS = true + logMessage(Console, Info, "šŸ› DEBUG MODE AUTO-ENABLED (file processing)") + logMessage(Console, Info, "āœ‚ļø SAVE CROPS AUTO-ENABLED (file processing)") + logMessage(Console, Info, "") + + // In test mode, all output goes to console only (no separate file log) + // Leave globalLogger as nil to avoid duplication when using Both target + globalLogger = nil + + // Setup + os.MkdirAll("test_output", 0755) + os.MkdirAll("review", 0755) + + // Load templates + logMessage(Console, Info, "Loading templates...") + templates, err := loadTemplates() + if err != nil { + logMessage(Console, Error, "āŒ Error loading templates: %v", err) + return + } + defer closeTemplates(templates) + logMessage(Console, Info, "āœ“ Templates loaded") + logMessage(Console, Info, "") + + // Create frame source + source := NewFileSource(framePath) + defer source.Close() + + // Create processor + processor := NewProcessor(templates, nil, nil) + state := NewProcessingState() + + // Process single frame with unified loop + processFrames(source, processor, state) + + logMessage(Console, Info, "") + logMessage(Console, Info, "āœ“ Single frame test complete") + logMessage(Console, Info, "") + logMessage(Console, Info, "=== OUTPUT FILES ===") + logMessage(Console, Info, "Test outputs:") + logMessage(Console, Info, " test_output/layout_boxes.jpg - Layout visualization") + logMessage(Console, Info, " review/f*_spo2_full.png - SpO2 recognition") + logMessage(Console, Info, " review/f*_hr_full.png - HR recognition") +} + +func runStreamingMode() { + // Create log file + logFilename := fmt.Sprintf("pulse-monitor_%s.log", time.Now().Format("20060102_150405")) + logFile, err := os.OpenFile(logFilename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + logMessage(Console, Warning, "Warning: Could not create log file: %v", err) + logMessage(Console, Info, "Continuing without file logging...") + logMessage(Console, Info, "") + globalLogger = nil + } else { + defer logFile.Close() + globalLogger = logFile + logMessage(Both, Info, "šŸ“ Logging to: %s", logFilename) + } + + // Setup directories - clean output directories in streaming mode + logMessage(Console, Info, "šŸ—‘ļø Cleaning output directories...") + + logMessage(Console, Info, " - review/... āœ“") + os.RemoveAll("review") + + logMessage(Console, Info, " - raw_frames/... āœ“") + os.RemoveAll("raw_frames") + + logMessage(Console, Info, " - test_output/... āœ“") + os.RemoveAll("test_output") + + os.MkdirAll("review", 0755) + os.MkdirAll("raw_frames", 0755) + os.MkdirAll("test_output", 0755) + logMessage(Console, Info, "") + + // Initialize live review HTML + if err := initReviewHTML(); err != nil { + logMessage(Console, Warning, "Warning: Could not initialize review HTML: %v", err) + } else { + logMessage(Console, Info, "āœ… Live review HTML initialized: review/review.html (refresh browser to see updates)") + } + + // Load config + config, err := LoadConfig("config.yaml") + if err != nil { + logMessage(Console, Error, "Error loading config: %v", err) + return + } + + // Load templates + templates, err := loadTemplates() + if err != nil { + logMessage(Console, Error, "Error loading templates: %v", err) + return + } + defer closeTemplates(templates) + + // Initialize timestamp OCR client (reusable) + InitTimestampOCR() + defer CloseTimestampOCR() + + logMessage(Console, Info, "šŸ“Š All processed frames saved to review/") + logMessage(Console, Info, " Press Ctrl+C to stop and generate review.html") + logMessage(Console, Info, "") + + // Create RTSP source with reconnection handling + logMessage(Console, Info, "Connecting to RTSP stream...") + var source *RTSPSource + for { + source, err = NewRTSPSource(config.Camera.RTSPURL) + if err == nil { + break + } + logMessage(Console, Warning, "Failed to connect: %v", err) + logMessage(Console, Info, "Retrying in 5 seconds...") + time.Sleep(5 * time.Second) + } + defer source.Close() + + logMessage(Console, Info, "āœ“ Connected! Press Ctrl+C to stop") + logMessage(Console, Info, "Posting to HASS: %s", config.HomeAssistant.URL) + logMessage(Console, Info, "") + + // Setup signal handler + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) + go func() { + <-sigChan + logMessage(Console, Info, "") + logMessage(Console, Info, "") + logMessage(Console, Info, "šŸ›‘ Received stop signal, finishing up...") + source.Close() // Safe to call multiple times now (sync.Once) + }() + + // Create processor + processor := NewProcessor(templates, config, logFile) + state := NewProcessingState() + + // Run unified processing loop + processFrames(source, processor, state) + + // Close review HTML + logMessage(Console, Info, "") + logMessage(Console, Info, "šŸ“ Closing review.html...") + if err := closeReviewHTML(); err != nil { + logMessage(Console, Error, "Error closing review HTML: %v", err) + } else { + logMessage(Console, Info, "āœ“ Review page completed: review/review.html (%d frames)", len(state.ReviewEntries)) + logMessage(Console, Info, " Open it in browser to review recognition results") + } +} + +// interruptibleSleep sleeps for the specified duration, but checks every second if source is closed +func interruptibleSleep(source FrameSource, duration time.Duration) { + remaining := duration + for remaining > 0 { + sleepTime := time.Second + if remaining < sleepTime { + sleepTime = remaining + } + time.Sleep(sleepTime) + remaining -= sleepTime + + // Check if source was closed (Ctrl+C pressed) + if !source.IsActive() { + return + } + } +} + +// processFrames is the UNIFIED processing loop - works for both RTSP and single-frame +func processFrames(source FrameSource, processor *Processor, state *ProcessingState) { + logMessage(Console, Info, "Processing frames from: %s", source.Name()) + logMessage(Console, Info, "") + + for { + // Initialize timing data + var timing TimingData + acquireStart := time.Now() + + // Read raw frame from source + var frame gocv.Mat + var shouldContinue bool + var err error + + if state.ConsecutiveFailures == 1 { + // First failure: grab IMMEDIATE next frame (bypass skip) + logMessage(Console, Debug, " Using NextImmediate() due to consecutive failure") + frame, shouldContinue, err = source.NextImmediate() + } else { + // Normal operation or 2nd+ failure: use normal skip + frame, shouldContinue, err = source.Next() + } + timing.Acquire = time.Since(acquireStart).Milliseconds() + + if err != nil { + logMessage(Both, Error, "Error reading frame: %v", err) + if !shouldContinue { + break + } + continue + } + if frame.Empty() { + if !shouldContinue { + break + } + continue + } + + // Check timestamp every 10 processed frames (BEFORE preprocessing, on colored frame) + state.ProcessedCount++ + state.TimestampCheckCounter++ + if state.TimestampCheckCounter >= 10 { + state.TimestampCheckCounter = 0 + diff, _, err := extractTimestamp(frame) + if err != nil { + logMessage(Both, Warning, " Timestamp OCR failed: %v", err) + } else if diff > 3 { + logMessage(Both, Warning, " Camera timestamp lag: %ds behind server", diff) + } else if diff < -3 { + logMessage(Both, Warning, " Camera timestamp ahead: %ds ahead of server", -diff) + } + // Silent if drift is within ±3 seconds + } + + // Save truly raw frame (before any preprocessing) - threshold to reduce file size + // Clone the colored frame, convert to grayscale, threshold at 240 + // This becomes the "raw" frame that will be saved on errors + rawFrameClone := frame.Clone() + rawGray := gocv.NewMat() + gocv.CvtColor(rawFrameClone, &rawGray, gocv.ColorBGRToGray) + rawThresholded := gocv.NewMat() + gocv.Threshold(rawGray, &rawThresholded, 240, 255, gocv.ThresholdBinary) + rawGray.Close() + rawFrameClone.Close() + // rawThresholded is now the true raw frame (portrait, with timestamp, thresholded) + // Will be passed to processFrame() for potential error saving + + // Preprocess COLORED frame (rotate, crop timestamp) + preprocessStart := time.Now() + logMessage(Console, Debug, " Before preprocess: %dx%d", frame.Cols(), frame.Rows()) + preprocessed := preprocessFrame(frame) + logMessage(Console, Debug, " After preprocess: %dx%d", preprocessed.Cols(), preprocessed.Rows()) + frame.Close() + timing.Preprocess = time.Since(preprocessStart).Milliseconds() + + // Calculate rotation angle on first frame (if not set) + if state.LockedRotation == 0.0 && !state.LayoutValid { + angle := calculateRotationAngle(preprocessed) + state.LockedRotation = angle + if angle != 0.0 { + logMessage(Both, Info, "šŸ”„ Rotation angle: %.2f° (will rotate all frames)", angle) + } else { + logMessage(Both, Info, "šŸ“ No rotation needed (angle < 0.5°)") + } + } + + // Apply rotation if needed (on colored preprocessed frame, BEFORE thresholding) + var rotated gocv.Mat + if state.LockedRotation != 0.0 { + rotateStart := time.Now() + rotated = rotateImage(preprocessed, state.LockedRotation) + preprocessed.Close() + logMessage(Both, Info, " āœ“ Applied rotation: %.3f° to frame", state.LockedRotation) + logMessage(LogFile, Debug, " [TIMING] Rotation: %dms", time.Since(rotateStart).Milliseconds()) + + // DEBUG: Save rotated frame with same visualization as BEFORE + if DEBUG_MODE && state.ProcessedCount <= 2 { + // Save plain rotated frame + gocv.IMWrite(fmt.Sprintf("test_output/rotated_frame_%d.png", state.ProcessedCount), rotated) + + // Create visualization matching BEFORE image + afterVis := rotated.Clone() + // Note: We can't draw the exact same boxes since they were calculated on pre-rotated frame + // But we can add a reference line to show the frame is rotated + gocv.PutText(&afterVis, + fmt.Sprintf("AFTER rotation: %.2f degrees", state.LockedRotation), + image.Pt(50, 50), + gocv.FontHersheyDuplex, 1.5, color.RGBA{0, 255, 0, 255}, 3) + gocv.IMWrite("test_output/rotation_boxes_AFTER.png", afterVis) + afterVis.Close() + + logMessage(Both, Info, " šŸ’¾ Saved rotated frame and AFTER visualization") + } + } else { + rotated = preprocessed + logMessage(Both, Info, " šŸ“ No rotation applied (angle=0)") + } + + // THRESHOLD AFTER PREPROCESSING AND ROTATION - SINGLE POINT OF TRUTH + // Convert rotated colored frame to grayscale then threshold at 240 + thresholdStart := time.Now() + gray := gocv.NewMat() + gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray) + thresholded := gocv.NewMat() + gocv.Threshold(gray, &thresholded, 240, 255, gocv.ThresholdBinary) + gray.Close() + rotated.Close() + timing.Threshold = time.Since(thresholdStart).Milliseconds() + + // Try to acquire layout if we don't have it + if !state.LayoutValid { + // Call layout detection directly + layout, scale, _ := detectScreenLayoutAreas(thresholded) + if layout != nil { + // Layout acquired successfully + state.Layout = layout + state.LockedScale = scale + state.LayoutValid = true + state.ConsecutiveFailures = 0 + + logMessage(LogFile, Info, " āœ“ Layout detected, scale factor: %.3f", scale) + logMessage(LogFile, Info, " SpO2 area: X=%d, Y=%d, Width=%d, Height=%d", + layout.SpO2Area.Min.X, layout.SpO2Area.Min.Y, + layout.SpO2Area.Dx(), layout.SpO2Area.Dy()) + logMessage(LogFile, Info, " HR area: X=%d, Y=%d, Width=%d, Height=%d", + layout.HRArea.Min.X, layout.HRArea.Min.Y, + layout.HRArea.Dx(), layout.HRArea.Dy()) + logMessage(Console, Info, "āœ“ Layout detected, scale: %.3f", scale) + } else { + // Layout acquisition failed - increment counter + state.ConsecutiveFailures++ + thresholded.Close() + rawThresholded.Close() + + // Escalation strategy + if state.ConsecutiveFailures == 1 { + // 1st failure: Skip this frame, try next immediately + logMessage(Both, Warning, " Layout detection failed (1st try) - trying next frame...") + } else if state.ConsecutiveFailures == 2 { + // 2nd failure: Try next frame immediately (different conditions?) + logMessage(Both, Warning, " Layout detection failed (2nd try) - trying next frame...") + } else if state.ConsecutiveFailures == 3 { + // 3rd failure: Wait 10s + logMessage(Both, Warning, " Layout detection failed (3rd try) - waiting 10s...") + interruptibleSleep(source, 10*time.Second) + } else if state.ConsecutiveFailures == 4 { + // 4th failure: Wait 30s + logMessage(Both, Warning, " Layout detection failed (4th try) - waiting 30s...") + interruptibleSleep(source, 30*time.Second) + } else { + // 5th+ failure: Wait 60s (device likely off/removed) + logMessage(Both, Info, " ā³ Pulse oximeter not detected (day mode) - waiting 60s...") + interruptibleSleep(source, 60*time.Second) + } + + if !shouldContinue { + break + } + continue + } + } + + // Process frame with current layout + timing.FrameNum = state.ProcessedCount + result := processor.processFrame(thresholded, rawThresholded, state.ProcessedCount, state, &timing) + thresholded.Close() + rawThresholded.Close() + + // Print timing table (header every 20 frames) + state.TimingFrameCount++ + showHeader := (state.TimingFrameCount % 20) == 1 + printTimingTable(timing, showHeader) + + // DEBUG: Show result status + if state.ConsecutiveFailures > 0 { + logMessage(Console, Debug, " Frame result: %v", result.Status) + } + + // Handle result + switch result.Status { + case StatusSuccess: + // Reset failure counter on successful processing + state.ConsecutiveFailures = 0 + if result.ShouldPost { + hassStart := time.Now() + processor.postReading(&result.Reading, state) + timing.HASS = time.Since(hassStart).Milliseconds() + timing.Total += timing.HASS // Add HASS time to total + } + + case StatusCorrupted: + // Corruption is a recognition issue, not layout issue + // Count as a failure for escalation + state.ConsecutiveFailures++ + if state.ConsecutiveFailures == 2 { + // 2nd consecutive problem: invalidate layout for re-detection + logMessage(Both, Warning, " Re-detecting layout...") + state.LayoutValid = false + } + + case StatusLowConfidence: + // Low confidence is also a problem that needs escalation + state.ConsecutiveFailures++ + if state.ConsecutiveFailures == 1 { + logMessage(Both, Warning, " Low confidence - grabbing next frame immediately...") + // Next iteration will automatically use NextImmediate() due to counter = 1 + } else if state.ConsecutiveFailures == 2 { + // 2nd consecutive problem: invalidate layout for re-detection + logMessage(Both, Warning, " 2nd consecutive low confidence - re-detecting layout...") + state.LayoutValid = false + } + + case StatusNoChange: + // Values didn't change - this is normal, not a failure + // Don't increment failure counter + + case StatusUnstable: + // Reading held for validation - this is normal, not a failure + // Don't increment failure counter + } + + // Check if we should continue + if !shouldContinue { + break + } + } +} + +func closeTemplates(templates map[int][]gocv.Mat) { + for _, templateList := range templates { + for _, t := range templateList { + t.Close() + } + } +} diff --git a/backups/backup_20251125/test_rotation.go b/backups/backup_20251125/test_rotation.go new file mode 100644 index 0000000..2f675a0 --- /dev/null +++ b/backups/backup_20251125/test_rotation.go @@ -0,0 +1,223 @@ +package main + +import ( + "fmt" + "log" + "os" + + "gocv.io/x/gocv" +) + +// Global DEBUG flag +var DEBUG = false + +func main() { + // Parse command line args + if len(os.Args) > 1 && os.Args[1] == "/debug" { + DEBUG = true + fmt.Println("DEBUG mode enabled\n") + } + + // Hardcode RTSP URL + streamURL := "rtsp://tapohass:!!Helder06@192.168.2.183:554/stream1" + + fmt.Println("Rotation Detection System") + fmt.Println("========================") + fmt.Println("Processing frames...\n") + + // Open RTSP stream + stream, err := gocv.OpenVideoCapture(streamURL) + if err != nil { + log.Fatalf("Failed to open stream: %v", err) + } + defer stream.Close() + + // Initialize detection state + rotation := 9999.0 // Sentinel value for "not detected" + width := 0 // 0 means "not detected" + scaleFactor := 0.0 // Scale factor from detection + + // Frame processing counters + framesSkipped := 0 + framesProcessed := 0 + detectionAttempts := 0 + totalFramesRead := 0 + SKIP_COUNT := 7 // Process every 8th frame + MAX_DETECTION_ATTEMPTS := 100 // Give up after 100 attempts + RE_DETECT_AFTER := 0 // Re-detect after N frames (0 = disabled) + + // Main processing loop + frame := gocv.NewMat() + defer frame.Close() + + for { + // Acquire a frame + if ok := stream.Read(&frame); !ok || frame.Empty() { + log.Fatal("Failed to read frame from stream") + } + totalFramesRead++ + + // Preprocess frame to binary + binary := PreprocessFrame(frame) + + // If rotation is 9999 or width is 0, detect bands/width/rotation + if rotation == 9999 || width == 0 || scaleFactor == 0.0 { + detectionAttempts++ + + // Give up if too many attempts + if detectionAttempts > MAX_DETECTION_ATTEMPTS { + fmt.Printf("āŒ Detection failed after %d attempts. Exiting.\n", MAX_DETECTION_ATTEMPTS) + break + } + + result := DetectRotationAndWidth(binary) + if result.Success { + rotation = result.Rotation + width = result.Width + scaleFactor = result.ScaleFactor + fmt.Printf("āœ“ DETECTION SUCCESSFUL (frame %d, attempt #%d):\n", totalFramesRead, detectionAttempts) + fmt.Printf(" Width=%dpx, Rotation=%.3f°, Scale=%.3f\n\n", width, rotation, scaleFactor) + detectionAttempts = 0 // Reset counter + } else { + // Show detection attempts with frame info + if detectionAttempts <= 5 { + if DEBUG { + fmt.Printf("Frame %d: Detection attempt #%d failed\n", totalFramesRead, detectionAttempts) + } + } else { + fmt.Printf("⚠ Frame %d: Detection attempt #%d failed - retrying...\n", totalFramesRead, detectionAttempts) + } + // Clean up and continue to next frame + binary.Close() + + // Skip a few frames to ensure display has changed + for skip := 0; skip < 3; skip++ { + if ok := stream.Read(&frame); !ok { + break + } + } + continue + } + } + + // If rotation != 9999 and width != 0 and scaleFactor != 0.0 and we've skipped enough frames + if rotation != 9999 && width != 0 && scaleFactor != 0.0 && framesSkipped >= SKIP_COUNT { + framesProcessed++ + + // Apply the same rotation correction to this frame + // (rotation was detected on a previous frame, now apply it to this one) + rotated := RotateImage(binary, rotation) + defer rotated.Close() + + // Save rotated frame BEFORE scaling + if DEBUG && framesProcessed == 1 { + gocv.IMWrite("debug_frame_001_AFTER_ROTATION_BEFORE_SCALE.png", rotated) + fmt.Println("DEBUG: Saved rotated frame before scaling") + } + + // Apply the same scaling using detected scale factor + // This scales the entire frame so the baseline becomes 860px + scaled := ScaleByFactor(rotated, scaleFactor) + defer scaled.Close() + + fmt.Printf("Frame #%d processed: Rotated %.3f°, Scaled to %dx%d (factor: %.3f, baseline: %dpx -> 860px)\n", + framesProcessed, rotation, scaled.Cols(), scaled.Rows(), scaleFactor, width) + + // Re-detect bands on scaled image for visualization + if DEBUG && framesProcessed <= 5 { + // Use the detected scale factor for minimum height + scaledMinHeight := int(80.0 * scaleFactor) + bands := DetectBands(scaled, scaledMinHeight) + + if len(bands) >= 2 { + // Find tallest band (graph) + tallestIdx := 0 + tallestHeight := 0 + for i, band := range bands { + height := band.maxY - band.minY + if height > tallestHeight { + tallestHeight = height + tallestIdx = i + } + } + + if tallestIdx < len(bands)-1 { + graphBand := bands[tallestIdx] + digitBand := bands[tallestIdx+1] + + // Extract digit region + digitRegion := ExtractBandRegion(scaled, digitBand) + defer digitRegion.Close() + + // Find digit boxes + digitBoxes := FindDigitBoxes(digitRegion, scaledMinHeight) + + if DEBUG { + fmt.Printf(" DEBUG: FindDigitBoxes returned %d boxes:\n", len(digitBoxes)) + for i, box := range digitBoxes { + fmt.Printf(" Box %d: X[%d-%d] Y[%d-%d] Size: %dx%d\n", + i+1, box.Min.X, box.Max.X, box.Min.Y, box.Max.Y, + box.Dx(), box.Dy()) + } + } + + // Calculate center from scaled baseline points + // Use the same proportion as detected + scaledCenterX := int(float64(scaled.Cols()) / 2.0) + + // Merge into displays + displayAreas := mergeDigitBoxesWithCenter(digitRegion, digitBoxes, scaledCenterX) + + fmt.Println(" DEBUG: Saving visualizations...") + + // Visualize bands + VisualizeSimpleBands(scaled, graphBand, digitBand, + fmt.Sprintf("debug_frame_%03d_bands.png", framesProcessed)) + + // Save digit region + gocv.IMWrite(fmt.Sprintf("debug_frame_%03d_digits.png", framesProcessed), digitRegion) + + // Visualize digit boxes + VisualizeDigitBoxes(digitRegion, digitBoxes, digitBand.minY, + fmt.Sprintf("debug_frame_%03d_digitboxes.png", framesProcessed)) + + // Visualize merged displays + VisualizeDetectedDisplays(digitRegion, displayAreas, digitBoxes, scaledCenterX, + fmt.Sprintf("debug_frame_%03d_merged.png", framesProcessed)) + + fmt.Printf(" DEBUG: Saved 4 visualization files for frame %d\n", framesProcessed) + } + } + } + + // Save scaled frame if in DEBUG mode + if DEBUG && framesProcessed <= 5 { + filename := fmt.Sprintf("debug_frame_%03d_scaled.png", framesProcessed) + gocv.IMWrite(filename, scaled) + fmt.Printf("DEBUG: Saved %s\n", filename) + } + + // TODO: Add OCR processing on 'scaled' image here in later phase + // Display areas (SpO2 and HR) are already detected and stored in result + // They will be available when we integrate this with the main monitoring system + + framesSkipped = 0 + + // Optional: Force re-detection after N frames to adapt to changes + if RE_DETECT_AFTER > 0 && framesProcessed >= RE_DETECT_AFTER { + fmt.Printf("\nšŸ”„ Re-detecting after %d frames...\n", RE_DETECT_AFTER) + rotation = 9999 + width = 0 + scaleFactor = 0.0 + framesProcessed = 0 + } + } else { + framesSkipped++ + } + + // Clean up + binary.Close() + } + + fmt.Println("\nProgram terminated.") +} diff --git a/backups/backup_20251125/timestamp_ocr.go b/backups/backup_20251125/timestamp_ocr.go new file mode 100644 index 0000000..42c95c4 --- /dev/null +++ b/backups/backup_20251125/timestamp_ocr.go @@ -0,0 +1,109 @@ +package main + +import ( + "fmt" + "image" + "regexp" + "strconv" + "strings" + "time" + + "github.com/otiai10/gosseract/v2" + "gocv.io/x/gocv" +) + +// Global reusable OCR client +var timestampOCRClient *gosseract.Client + +// InitTimestampOCR initializes the reusable OCR client +func InitTimestampOCR() { + timestampOCRClient = gosseract.NewClient() + timestampOCRClient.SetPageSegMode(gosseract.PSM_SINGLE_LINE) + timestampOCRClient.SetWhitelist("0123456789-: ") +} + +// CloseTimestampOCR closes the OCR client +func CloseTimestampOCR() { + if timestampOCRClient != nil { + timestampOCRClient.Close() + } +} + +// extractTimestamp extracts and OCRs the timestamp from the top of the frame +// Returns: (secondsDifference, ocrDurationMs, error) +func extractTimestamp(frame gocv.Mat) (int, int64, error) { + startTime := time.Now() + + // Extract 1024x68 region + timestampRegion := frame.Region(image.Rect(0, 0, 1024, 68)) + defer timestampRegion.Close() + + // Downscale 50% (reduces OCR processing time) + downscaled := gocv.NewMat() + defer downscaled.Close() + gocv.Resize(timestampRegion, &downscaled, image.Point{512, 34}, 0, 0, gocv.InterpolationArea) + + // Grayscale + gray := gocv.NewMat() + defer gray.Close() + gocv.CvtColor(downscaled, &gray, gocv.ColorBGRToGray) + + // Binary threshold (Otsu automatically finds best threshold) + binary := gocv.NewMat() + defer binary.Close() + gocv.Threshold(gray, &binary, 0, 255, gocv.ThresholdBinary|gocv.ThresholdOtsu) + + // Invert (white text on black → black text on white for better OCR) + inverted := gocv.NewMat() + defer inverted.Close() + gocv.BitwiseNot(binary, &inverted) + + // Encode as PNG (lossless, better for text) + buf, err := gocv.IMEncode(".png", inverted) + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("failed to encode: %w", err) + } + defer buf.Close() + + // OCR with REUSED client (major speed boost) + err = timestampOCRClient.SetImageFromBytes(buf.GetBytes()) + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("failed to set image: %w", err) + } + + text, err := timestampOCRClient.Text() + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("OCR failed: %w", err) + } + + // Parse timestamp + text = strings.TrimSpace(text) + text = strings.ReplaceAll(text, "\n", "") + + // Pattern: YYYY-MM-DD HH:MM:SS + re := regexp.MustCompile(`(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})`) + matches := re.FindStringSubmatch(text) + + if len(matches) != 7 { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("could not parse timestamp from: '%s'", text) + } + + // Parse components + year, _ := strconv.Atoi(matches[1]) + month, _ := strconv.Atoi(matches[2]) + day, _ := strconv.Atoi(matches[3]) + hour, _ := strconv.Atoi(matches[4]) + minute, _ := strconv.Atoi(matches[5]) + second, _ := strconv.Atoi(matches[6]) + + // Create timestamp in local timezone + cameraTime := time.Date(year, time.Month(month), day, hour, minute, second, 0, time.Local) + serverTime := time.Now() + + // Calculate difference + diff := int(serverTime.Sub(cameraTime).Seconds()) + + ocrDuration := time.Since(startTime).Milliseconds() + + return diff, ocrDuration, nil +} diff --git a/backups/backup_20251125/training_digits/0_1.png b/backups/backup_20251125/training_digits/0_1.png new file mode 100644 index 0000000..fe3a182 Binary files /dev/null and b/backups/backup_20251125/training_digits/0_1.png differ diff --git a/backups/backup_20251125/training_digits/0_2.png b/backups/backup_20251125/training_digits/0_2.png new file mode 100644 index 0000000..0b1e716 Binary files /dev/null and b/backups/backup_20251125/training_digits/0_2.png differ diff --git a/backups/backup_20251125/training_digits/0_3.png b/backups/backup_20251125/training_digits/0_3.png new file mode 100644 index 0000000..677052d Binary files /dev/null and b/backups/backup_20251125/training_digits/0_3.png differ diff --git a/backups/backup_20251125/training_digits/1_1.png b/backups/backup_20251125/training_digits/1_1.png new file mode 100644 index 0000000..6298875 Binary files /dev/null and b/backups/backup_20251125/training_digits/1_1.png differ diff --git a/backups/backup_20251125/training_digits/1_2.png b/backups/backup_20251125/training_digits/1_2.png new file mode 100644 index 0000000..365d71d Binary files /dev/null and b/backups/backup_20251125/training_digits/1_2.png differ diff --git a/backups/backup_20251125/training_digits/1_3.png b/backups/backup_20251125/training_digits/1_3.png new file mode 100644 index 0000000..e4ad646 Binary files /dev/null and b/backups/backup_20251125/training_digits/1_3.png differ diff --git a/backups/backup_20251125/training_digits/1_4.png b/backups/backup_20251125/training_digits/1_4.png new file mode 100644 index 0000000..b34fbc5 Binary files /dev/null and b/backups/backup_20251125/training_digits/1_4.png differ diff --git a/backups/backup_20251125/training_digits/1_5.png b/backups/backup_20251125/training_digits/1_5.png new file mode 100644 index 0000000..2646400 Binary files /dev/null and b/backups/backup_20251125/training_digits/1_5.png differ diff --git a/backups/backup_20251125/training_digits/2_1.png b/backups/backup_20251125/training_digits/2_1.png new file mode 100644 index 0000000..1a2a81a Binary files /dev/null and b/backups/backup_20251125/training_digits/2_1.png differ diff --git a/backups/backup_20251125/training_digits/2_2.png b/backups/backup_20251125/training_digits/2_2.png new file mode 100644 index 0000000..21bbb98 Binary files /dev/null and b/backups/backup_20251125/training_digits/2_2.png differ diff --git a/backups/backup_20251125/training_digits/2_3.png b/backups/backup_20251125/training_digits/2_3.png new file mode 100644 index 0000000..1c3ca59 Binary files /dev/null and b/backups/backup_20251125/training_digits/2_3.png differ diff --git a/backups/backup_20251125/training_digits/2_4.png b/backups/backup_20251125/training_digits/2_4.png new file mode 100644 index 0000000..76f1052 Binary files /dev/null and b/backups/backup_20251125/training_digits/2_4.png differ diff --git a/backups/backup_20251125/training_digits/3_1.png b/backups/backup_20251125/training_digits/3_1.png new file mode 100644 index 0000000..bbd30a1 Binary files /dev/null and b/backups/backup_20251125/training_digits/3_1.png differ diff --git a/backups/backup_20251125/training_digits/3_2.png b/backups/backup_20251125/training_digits/3_2.png new file mode 100644 index 0000000..a0612fa Binary files /dev/null and b/backups/backup_20251125/training_digits/3_2.png differ diff --git a/backups/backup_20251125/training_digits/3_3.png b/backups/backup_20251125/training_digits/3_3.png new file mode 100644 index 0000000..a5bfb55 Binary files /dev/null and b/backups/backup_20251125/training_digits/3_3.png differ diff --git a/backups/backup_20251125/training_digits/4_1.png b/backups/backup_20251125/training_digits/4_1.png new file mode 100644 index 0000000..b4a446d Binary files /dev/null and b/backups/backup_20251125/training_digits/4_1.png differ diff --git a/backups/backup_20251125/training_digits/5_1.png b/backups/backup_20251125/training_digits/5_1.png new file mode 100644 index 0000000..2c49e59 Binary files /dev/null and b/backups/backup_20251125/training_digits/5_1.png differ diff --git a/backups/backup_20251125/training_digits/5_2.png b/backups/backup_20251125/training_digits/5_2.png new file mode 100644 index 0000000..cd430e9 Binary files /dev/null and b/backups/backup_20251125/training_digits/5_2.png differ diff --git a/backups/backup_20251125/training_digits/5_3.png b/backups/backup_20251125/training_digits/5_3.png new file mode 100644 index 0000000..3d32aee Binary files /dev/null and b/backups/backup_20251125/training_digits/5_3.png differ diff --git a/backups/backup_20251125/training_digits/6_1.png b/backups/backup_20251125/training_digits/6_1.png new file mode 100644 index 0000000..39f08ef Binary files /dev/null and b/backups/backup_20251125/training_digits/6_1.png differ diff --git a/backups/backup_20251125/training_digits/6_2.png b/backups/backup_20251125/training_digits/6_2.png new file mode 100644 index 0000000..f3ac1e1 Binary files /dev/null and b/backups/backup_20251125/training_digits/6_2.png differ diff --git a/backups/backup_20251125/training_digits/6_3.png b/backups/backup_20251125/training_digits/6_3.png new file mode 100644 index 0000000..7615701 Binary files /dev/null and b/backups/backup_20251125/training_digits/6_3.png differ diff --git a/backups/backup_20251125/training_digits/6_4.png b/backups/backup_20251125/training_digits/6_4.png new file mode 100644 index 0000000..f5e1675 Binary files /dev/null and b/backups/backup_20251125/training_digits/6_4.png differ diff --git a/backups/backup_20251125/training_digits/6_5.png b/backups/backup_20251125/training_digits/6_5.png new file mode 100644 index 0000000..326fe52 Binary files /dev/null and b/backups/backup_20251125/training_digits/6_5.png differ diff --git a/backups/backup_20251125/training_digits/7_1.png b/backups/backup_20251125/training_digits/7_1.png new file mode 100644 index 0000000..736ca35 Binary files /dev/null and b/backups/backup_20251125/training_digits/7_1.png differ diff --git a/backups/backup_20251125/training_digits/7_2.png b/backups/backup_20251125/training_digits/7_2.png new file mode 100644 index 0000000..af58c55 Binary files /dev/null and b/backups/backup_20251125/training_digits/7_2.png differ diff --git a/backups/backup_20251125/training_digits/7_3.png b/backups/backup_20251125/training_digits/7_3.png new file mode 100644 index 0000000..04fff8a Binary files /dev/null and b/backups/backup_20251125/training_digits/7_3.png differ diff --git a/backups/backup_20251125/training_digits/7_4.png b/backups/backup_20251125/training_digits/7_4.png new file mode 100644 index 0000000..4e8e5c5 Binary files /dev/null and b/backups/backup_20251125/training_digits/7_4.png differ diff --git a/backups/backup_20251125/training_digits/7_5.png b/backups/backup_20251125/training_digits/7_5.png new file mode 100644 index 0000000..809ca6b Binary files /dev/null and b/backups/backup_20251125/training_digits/7_5.png differ diff --git a/backups/backup_20251125/training_digits/7_6.png b/backups/backup_20251125/training_digits/7_6.png new file mode 100644 index 0000000..03de1b2 Binary files /dev/null and b/backups/backup_20251125/training_digits/7_6.png differ diff --git a/backups/backup_20251125/training_digits/7_7.png b/backups/backup_20251125/training_digits/7_7.png new file mode 100644 index 0000000..0e84826 Binary files /dev/null and b/backups/backup_20251125/training_digits/7_7.png differ diff --git a/backups/backup_20251125/training_digits/7_8.png b/backups/backup_20251125/training_digits/7_8.png new file mode 100644 index 0000000..9451d4b Binary files /dev/null and b/backups/backup_20251125/training_digits/7_8.png differ diff --git a/backups/backup_20251125/training_digits/8_1.png b/backups/backup_20251125/training_digits/8_1.png new file mode 100644 index 0000000..287d19f Binary files /dev/null and b/backups/backup_20251125/training_digits/8_1.png differ diff --git a/backups/backup_20251125/training_digits/8_2.png b/backups/backup_20251125/training_digits/8_2.png new file mode 100644 index 0000000..66fa3eb Binary files /dev/null and b/backups/backup_20251125/training_digits/8_2.png differ diff --git a/backups/backup_20251125/training_digits/8_3.png b/backups/backup_20251125/training_digits/8_3.png new file mode 100644 index 0000000..890e777 Binary files /dev/null and b/backups/backup_20251125/training_digits/8_3.png differ diff --git a/backups/backup_20251125/training_digits/8_4.png b/backups/backup_20251125/training_digits/8_4.png new file mode 100644 index 0000000..8d387e5 Binary files /dev/null and b/backups/backup_20251125/training_digits/8_4.png differ diff --git a/backups/backup_20251125/training_digits/9_1.png b/backups/backup_20251125/training_digits/9_1.png new file mode 100644 index 0000000..5f7adfb Binary files /dev/null and b/backups/backup_20251125/training_digits/9_1.png differ diff --git a/backups/backup_20251125/training_digits/9_2.png b/backups/backup_20251125/training_digits/9_2.png new file mode 100644 index 0000000..0d78ce8 Binary files /dev/null and b/backups/backup_20251125/training_digits/9_2.png differ diff --git a/backups/backup_20251125/training_digits/9_3.png b/backups/backup_20251125/training_digits/9_3.png new file mode 100644 index 0000000..bc10982 Binary files /dev/null and b/backups/backup_20251125/training_digits/9_3.png differ diff --git a/backups/backup_20251125/training_digits/9_4.png b/backups/backup_20251125/training_digits/9_4.png new file mode 100644 index 0000000..7d2c8a1 Binary files /dev/null and b/backups/backup_20251125/training_digits/9_4.png differ diff --git a/backups/backup_20251125/training_digits/9_5.png b/backups/backup_20251125/training_digits/9_5.png new file mode 100644 index 0000000..d4b410f Binary files /dev/null and b/backups/backup_20251125/training_digits/9_5.png differ diff --git a/backups/backup_20251125/training_digits/9_6.png b/backups/backup_20251125/training_digits/9_6.png new file mode 100644 index 0000000..e7b7614 Binary files /dev/null and b/backups/backup_20251125/training_digits/9_6.png differ diff --git a/backups/backup_20251125/training_digits/invalid/invalid_1.png b/backups/backup_20251125/training_digits/invalid/invalid_1.png new file mode 100644 index 0000000..4d2eeed Binary files /dev/null and b/backups/backup_20251125/training_digits/invalid/invalid_1.png differ diff --git a/backups/backup_20251125/training_digits/invalid/invalid_2.png b/backups/backup_20251125/training_digits/invalid/invalid_2.png new file mode 100644 index 0000000..18e13e4 Binary files /dev/null and b/backups/backup_20251125/training_digits/invalid/invalid_2.png differ diff --git a/backups/backup_20251125/training_digits/invalid/invalid_3.png b/backups/backup_20251125/training_digits/invalid/invalid_3.png new file mode 100644 index 0000000..32e76a6 Binary files /dev/null and b/backups/backup_20251125/training_digits/invalid/invalid_3.png differ diff --git a/backups/backup_20251125/training_digits/invalid/invalid_4.png b/backups/backup_20251125/training_digits/invalid/invalid_4.png new file mode 100644 index 0000000..ec03bad Binary files /dev/null and b/backups/backup_20251125/training_digits/invalid/invalid_4.png differ diff --git a/backups/backup_20251125/training_digits/invalid/invalid_5.png b/backups/backup_20251125/training_digits/invalid/invalid_5.png new file mode 100644 index 0000000..5c53a72 Binary files /dev/null and b/backups/backup_20251125/training_digits/invalid/invalid_5.png differ diff --git a/backups/backup_20251125/training_digits/invalid/invalid_6.png b/backups/backup_20251125/training_digits/invalid/invalid_6.png new file mode 100644 index 0000000..037bbb8 Binary files /dev/null and b/backups/backup_20251125/training_digits/invalid/invalid_6.png differ diff --git a/backups/backup_20251125/training_digits/invalid/invalid_7.png b/backups/backup_20251125/training_digits/invalid/invalid_7.png new file mode 100644 index 0000000..b8b26b7 Binary files /dev/null and b/backups/backup_20251125/training_digits/invalid/invalid_7.png differ diff --git a/backups/backup_20251125/types.go b/backups/backup_20251125/types.go new file mode 100644 index 0000000..0b11caa --- /dev/null +++ b/backups/backup_20251125/types.go @@ -0,0 +1,167 @@ +package main + +import ( + "gocv.io/x/gocv" +) + +// ProcessingStatus represents the outcome of processing a frame +type ProcessingStatus int + +const ( + StatusSuccess ProcessingStatus = iota + StatusCorrupted + StatusLowConfidence + StatusLayoutInvalid + StatusNoChange + StatusUnstable +) + +// Reading holds SpO2 and HR values with confidence scores +type Reading struct { + SpO2 int + SpO2LeftDigit int + SpO2LeftConf float64 + SpO2RightDigit int + SpO2RightConf float64 + HR int + HRLeftDigit int + HRLeftConf float64 + HRRightDigit int + HRRightConf float64 + Timestamp string + FrameNum int +} + +// AvgConfidence returns average confidence for SpO2 and HR +func (r *Reading) AvgConfidence() (spo2Avg, hrAvg float64) { + spo2Avg = (r.SpO2LeftConf + r.SpO2RightConf) / 2.0 + hrAvg = (r.HRLeftConf + r.HRRightConf) / 2.0 + return +} + +// IsCorrupted checks if any digit is marked as invalid (-1) +func (r *Reading) IsCorrupted() bool { + return r.SpO2LeftDigit == -1 || r.SpO2RightDigit == -1 || + r.HRLeftDigit == -1 || r.HRRightDigit == -1 +} + +// GetCorruptionDetails returns a description of which digit(s) are corrupted +func (r *Reading) GetCorruptionDetails() string { + corrupted := []string{} + if r.SpO2LeftDigit == -1 { + corrupted = append(corrupted, "SpO2 digit 2") + } + if r.SpO2RightDigit == -1 { + corrupted = append(corrupted, "SpO2 digit 3") + } + if r.HRLeftDigit == -1 { + corrupted = append(corrupted, "HR digit 2") + } + if r.HRRightDigit == -1 { + corrupted = append(corrupted, "HR digit 3") + } + if len(corrupted) == 0 { + return "unknown" + } + result := "" + for i, c := range corrupted { + if i > 0 { + result += ", " + } + result += c + } + return result +} + +// ProcessingResult contains the outcome of processing a single frame +type ProcessingResult struct { + Status ProcessingStatus + Reading Reading + ShouldPost bool + UnstableReason string +} + +// ProcessingState tracks all state across frame processing +type ProcessingState struct { + // Layout detection + Layout *ScreenLayout + LayoutValid bool + LockedScale float64 + LockedRotation float64 // Rotation angle in degrees (0 = no rotation) + + // Change detection + LastPosted Reading + + // Stability tracking + LastReading Reading + HasLastReading bool + PendingReading *Reading + BaselineReading Reading + + // Retry state + InRetry bool + RetryCount int + MaxRetries int + ConsecutiveFailures int // Track consecutive layout/processing failures + + // Statistics + FrameCount int + ProcessedCount int + SuccessCount int + FailureCount int + LowConfidenceCount int + TimestampCheckCounter int // Check timestamp every 10 processed frames + + // Review entries + ReviewEntries []ReviewEntry + + // Timing tracking + TimingFrameCount int // Count frames for periodic header +} + +// NewProcessingState creates initial state +func NewProcessingState() *ProcessingState { + return &ProcessingState{ + LayoutValid: false, + LockedScale: 1.0, + LockedRotation: 0.0, + MaxRetries: 1, + LastPosted: Reading{SpO2: -1, HR: -1}, + LastReading: Reading{SpO2: -1, HR: -1}, + BaselineReading: Reading{SpO2: -1, HR: -1}, + } +} + +// Action represents what to do next +type Action int + +const ( + ActionContinue Action = iota // Continue to next frame + ActionRetry // Retry with next frame + layout re-detection + ActionPost // Post reading to Home Assistant + ActionHold // Hold reading for validation + ActionDiscard // Discard reading +) + +// FrameData represents a preprocessed frame ready for processing +type FrameData struct { + Original gocv.Mat + Normalized gocv.Mat + FrameNum int + Timestamp string +} + +// TimingData holds timing measurements for a single frame +type TimingData struct { + FrameNum int + Acquire int64 // Frame acquisition (ms) + Threshold int64 // Grayscale + threshold (ms) + Preprocess int64 // Crop + rotate (ms) + Scale int64 // Frame scaling (ms) + OCR_SpO2 int64 // SpO2 recognition (ms) + OCR_HR int64 // HR recognition (ms) + Validation int64 // Validation checks (ms) + FileIO int64 // Saving review images (ms) + HASS int64 // Home Assistant POST (ms) + Total int64 // Total processing time (ms) +} diff --git a/backups/backup_20251125/validators.go b/backups/backup_20251125/validators.go new file mode 100644 index 0000000..310369e --- /dev/null +++ b/backups/backup_20251125/validators.go @@ -0,0 +1,144 @@ +package main + +import ( + "fmt" + "io" + "time" +) + +// Validator functions handle the three-tier exception system + +// validateCorruption checks for corrupted digits (Handler #1) +func validateCorruption(reading *Reading, logger io.Writer) (Action, string) { + if reading.IsCorrupted() { + logMessage(LogFile, Info, " [CORRUPTION] Invalid digit detected: SpO2(%d,%d) HR(%d,%d) - skipping frame", + reading.SpO2LeftDigit, reading.SpO2RightDigit, + reading.HRLeftDigit, reading.HRRightDigit) + return ActionContinue, "" + } + return ActionContinue, "" +} + +// validateConfidence checks OCR confidence (Handler #2) +func validateConfidence(reading *Reading, state *ProcessingState, logger io.Writer) (Action, string) { + spo2Avg, hrAvg := reading.AvgConfidence() + + // High confidence - pass through + if spo2Avg >= 85 && hrAvg >= 85 { + state.InRetry = false // Clear retry flag + return ActionContinue, "" + } + + // Low confidence + if !state.InRetry { + // First low confidence - trigger retry + state.InRetry = true + logMessage(LogFile, Info, " [LOW CONFIDENCE] SpO2: %.1f%%, HR: %.1f%% - retrying with layout re-detection", + spo2Avg, hrAvg) + return ActionRetry, "" + } + + // Second consecutive low confidence - give up + state.InRetry = false + logMessage(LogFile, Info, " [LOW CONFIDENCE] Still low after retry (SpO2: %.1f%%, HR: %.1f%%) - pausing", + spo2Avg, hrAvg) + return ActionDiscard, "" +} + +// validateStability checks for large deltas with hindsight validation (Handler #3) +func validateStability(reading *Reading, state *ProcessingState, logger io.Writer) (Action, string) { + // First reading - just accept it + if !state.HasLastReading { + state.LastReading = *reading + state.HasLastReading = true + state.BaselineReading = *reading + return ActionPost, "" + } + + // Calculate deltas from last reading + spo2Delta := abs(reading.SpO2 - state.LastReading.SpO2) + hrDelta := abs(reading.HR - state.LastReading.HR) + + // Stable (Ī” ≤ 3) - check if we have pending + if spo2Delta <= 3 && hrDelta <= 3 { + if state.PendingReading != nil { + // Check if current reading is closer to baseline or to pending + spo2ToBaseline := abs(reading.SpO2 - state.BaselineReading.SpO2) + hrToBaseline := abs(reading.HR - state.BaselineReading.HR) + spo2ToPending := abs(reading.SpO2 - state.PendingReading.SpO2) + hrToPending := abs(reading.HR - state.PendingReading.HR) + + if spo2ToBaseline <= spo2ToPending && hrToBaseline <= hrToPending { + // Closer to baseline - pending was a glitch + logMessage(LogFile, Info, " [STABILITY] Discarding glitch: baseline(%d,%d) -> pending(%d,%d) -> current(%d,%d)", + state.BaselineReading.SpO2, state.BaselineReading.HR, + state.PendingReading.SpO2, state.PendingReading.HR, + reading.SpO2, reading.HR) + state.PendingReading = nil + } else { + // Closer to pending - real trend, post pending first + logMessage(LogFile, Info, " [STABILITY] Confirming trend: baseline(%d,%d) -> pending(%d,%d) -> current(%d,%d)", + state.BaselineReading.SpO2, state.BaselineReading.HR, + state.PendingReading.SpO2, state.PendingReading.HR, + reading.SpO2, reading.HR) + // Will be handled by caller to post pending first + state.PendingReading = nil + } + } + + // Accept current reading + state.LastReading = *reading + state.BaselineReading = *reading + return ActionPost, "" + } + + // Large delta (Ī” > 3) - check if we have pending + if state.PendingReading != nil { + // Check direction vs pending + spo2Direction := reading.SpO2 - state.PendingReading.SpO2 + hrDirection := reading.HR - state.PendingReading.HR + pendingSpo2Diff := state.PendingReading.SpO2 - state.LastReading.SpO2 + pendingHrDiff := state.PendingReading.HR - state.LastReading.HR + + // Same direction if signs match + spo2SameDir := (spo2Direction > 0 && pendingSpo2Diff > 0) || (spo2Direction < 0 && pendingSpo2Diff < 0) + hrSameDir := (hrDirection > 0 && pendingHrDiff > 0) || (hrDirection < 0 && pendingHrDiff < 0) + + if spo2SameDir && hrSameDir { + // Trend confirmed - post pending, then current becomes new pending + logMessage(LogFile, Info, " [STABILITY] Trend confirmed: %d->%d->%d (SpO2), %d->%d->%d (HR)", + state.LastReading.SpO2, state.PendingReading.SpO2, reading.SpO2, + state.LastReading.HR, state.PendingReading.HR, reading.HR) + // Post pending (caller handles), then hold current + oldPending := state.PendingReading + state.PendingReading = reading + state.LastReading = *reading + // Return special signal that pending should be posted + _ = oldPending // Will be handled by caller + return ActionHold, "" + } else { + // Opposite directions - discard pending as glitch + logMessage(LogFile, Info, " [STABILITY] Direction mismatch: %d->%d->%d (SpO2), %d->%d->%d (HR)", + state.LastReading.SpO2, state.PendingReading.SpO2, reading.SpO2, + state.LastReading.HR, state.PendingReading.HR, reading.HR) + state.PendingReading = nil + state.LastReading = *reading + return ActionPost, "" + } + } + + // No pending yet - hold this unstable reading + logMessage(LogFile, Info, " [STABILITY] Holding unstable reading: SpO2 Ī”%d, HR Ī”%d", spo2Delta, hrDelta) + state.BaselineReading = state.LastReading + state.PendingReading = reading + state.LastReading = *reading + reason := fmt.Sprintf("[%s] Unstable (SpO2 Ī”%d, HR Ī”%d)", time.Now().Format("15:04:05.000"), spo2Delta, hrDelta) + return ActionHold, reason +} + +func abs(x int) int { + if x < 0 { + return -x + } + return x +} diff --git a/backups/backup_20251127/PROJECT_STATE.md b/backups/backup_20251127/PROJECT_STATE.md new file mode 100644 index 0000000..e8071e0 --- /dev/null +++ b/backups/backup_20251127/PROJECT_STATE.md @@ -0,0 +1,1544 @@ +# PulseOx Monitor - Project State & Technical Documentation + +**Current Version:** v3.57 (True Raw Frame Saving) +**Last Updated:** November 17, 2025 +**Status:** Production - threshold after preprocessing, timestamp stays colored + +āš ļø **IMPORTANT:** Update VERSION constant in pulseox-monitor.go and this document with every build! + +**For AI Assistant:** You MUST increment the version number in both pulseox-monitor.go and this document whenever you make ANY code changes. This is not optional - it's a critical part of the workflow. + +--- + +## šŸ”§ For New Chat Sessions: Data Access + +**You have full filesystem access to `/Users/johanjongsma/pulse-monitor/`** + +Use these tools to work with the project: +- `Filesystem:read_file` - Read source code, logs, config files +- `Filesystem:write_file` - Create new files +- `Filesystem:edit_file` - Modify existing code (preferred for edits) +- `Filesystem:list_directory` - See what files exist +- `Filesystem:move_file` - Move/rename files (e.g., approving training digits) +- `Filesystem:search_files` - Find files by pattern + +**Key files you'll need:** +- Source: `pulseox-monitor.go`, `ocr.go`, `layout_detection.go`, etc. +- Config: `config.yaml` +- Training: `training_digits/*.png`, `training_digits/invalid/*.png` +- Output: `review/*.png`, `review/review.html` +- Logs: `pulseox-monitor_*.log` + +**DO NOT waste time asking how to access files - just use the filesystem tools directly!** + +--- + +## Project Overview + +This system monitors a Masimo Rad-G pulse oximeter by reading its display via RTSP camera feed. It uses computer vision (OpenCV/gocv) to perform OCR on the SpO2 and heart rate values, then posts readings to Home Assistant for tracking and alerting. + +### Why This Exists +The Masimo pulse oximeter doesn't have a data output port, so we use a camera pointed at its display. The display has significant overexposure and light leakage issues, making traditional OCR (Tesseract) unreliable. We developed a custom template-matching approach that handles these challenging conditions. + +--- + +## Architecture + +### Main Components + +1. **pulseox-monitor.go** - Main application (v3.10) + - Unified frame processing loop + - Interruptible sleep for graceful shutdown + - Smart escalation strategy + - Signal handling (Ctrl+C) + +2. **processor.go** - Frame processor + - OCR execution + - Result validation + - Home Assistant posting + +3. **validators.go** - Validation handlers + - Corruption detection + - Confidence checking + - Stability validation (hindsight) + +4. **ocr.go** - Recognition engine + - Template matching for digits 0-9 + - Invalid pattern detection (corruption) + - `hasOneAt()` function for detecting narrow '1' digits + - Dynamic width calculation for 2-digit vs 3-digit displays + +5. **layout_detection.go** - Display locator + - Finds SpO2 and HR display areas in frame + - Position-based detection (rightmost edge method) + - Handles digit fragmentation + +6. **normalize.go** - Frame normalization + - Screen width detection + - Scaling to 860px width + - Layout detection with normalization + +7. **frame_source.go** - Frame sources + - RTSP streaming source + - Single-file test source + - Interruptible via `IsActive()` method + +8. **types.go** - Shared data structures + - ProcessingState with ConsecutiveFailures counter + - Reading, ProcessingResult types + - Status and Action enums + +9. **homeassistant.go** - Data posting + - REST API integration + - Posts only when values change + - Separate sensors: `sensor.pulse_ox_spo2`, `sensor.pulse_ox_hr` + +10. **html_report.go** - Review interface + - Dark-themed HTML generated on shutdown + - Shows all processed frames with confidence scores + - Visual indicators for unstable readings + +11. **timestamp_ocr.go** - Timestamp validation + - Extracts camera timestamp from frame + - Uses Tesseract OCR + - Validates against server time + - Warns if lag >10 seconds + +### Data Flow + +``` +RTSP Stream / File → Frame Capture (every 4th frame) → +Timestamp validation (every 10th frame) on **COLORED** frame → +**Clone frame for error saving** (grayscale → threshold at 240 → save as rawThresholded) → +Preprocessing (crop 68px top, rotate 90° CW) on **COLORED** frame → +**THRESHOLD ONCE at 240** (grayscale → binary) AFTER preprocessing → +Layout detection (find contours on binary) → +Display area extraction (280x200px binary regions) → +Template matching OCR on binary → +Digit validation (8 & 0) → +Exception handling (saves rawThresholded on errors) → +Home Assistant posting +``` + +--- + +## Recognition System + +### Template Matching Approach + +We use a library of digit images extracted from the actual pulse oximeter display. Each digit (0-9) can have multiple template variants to handle slight variations. + +**Current templates:** +- Digits 0-9: Multiple variants per digit +- Invalid patterns: 2 templates in `training_digits/invalid/` + - `white_blob_1.png` + - `invalid_2.png` + +**Template location:** `/Users/johanjongsma/pulse-monitor/training_digits/` + +### Digit Width Detection + +The system handles both 2-digit (XX) and 3-digit (1XX) displays dynamically: + +- **Narrow '1' detection:** `hasOneAt(x)` checks if column X is 80%+ white and column X+10 is 80%+ black +- **Check positions:** + - Digit 3 (rightmost): `w-8` pixels from right edge + - Digit 2: Depends on digit 3 width (72px for '1', 100px otherwise) + - Digit 1: Depends on both digit 2 and 3 widths + +**Why -8 offset for digit 3?** +Originally used -5, but testing showed '1' digits needed detection slightly more to the left to catch the right edge reliably. + +### Display Positioning + +- **SpO2 and HR displays:** 234 pixels apart vertically +- **Layout detection:** Run once at startup, locked unless re-detection triggered +- **Frame processing:** Every 4th frame (~3.75 fps from 15fps stream) +- **Timestamp check:** Every 10th processed frame (~2.5 seconds) + +--- + +## Exception Handling System + +The system has three independent exception handlers that run in sequence: + +### 1. Confirmed Corruption (Priority: Highest) +**Trigger:** Any digit recognized as `-1` (matched invalid template with >70% confidence) + +**Behavior:** +- Log to file (silent to console) +- Skip corrupted frame +- Continue to next frame through normal processing +- No special retry, no layout re-detection + +**Why this approach:** +Corruption is a recognition failure, not a camera/lighting issue. The next frame will naturally be clean. + +**Invalid templates:** +Stored in `training_digits/invalid/` - patterns like white blobs, UI elements, or corrupted digits that should never be interpreted as valid numbers. + +### 2. Low OCR Confidence +**Trigger:** Average confidence < 85% for SpO2 or HR, OR negative values (unrecognized digits) + +**Behavior:** +1. **First occurrence:** + - Log "āš ļø Low confidence, retrying with next frame..." or "[UNRECOGNIZED] SpO2=X, HR=Y" + - Read next frame immediately + - Re-detect layout (handles camera movement/vibration) + - Process retry frame + - If successful (high confidence + values changed): perform stability check and post + +2. **Second consecutive low confidence:** + - Log "āš ļø Low confidence after retry, pausing 2 seconds" + - Give up, wait 2 seconds before continuing + +**Why re-detect layout:** +Low confidence or unrecognized digits often indicate camera movement, lighting changes, focus issues, or temporary blur. Re-detecting layout helps recover from these transient problems. + +### 3. Large Delta (Hindsight Validation) +**Trigger:** Values changed >3 points from last stable reading (e.g., SpO2: 95→92 or HR: 70→75) + +**Behavior:** +1. Hold new reading as "pending" (don't post yet) +2. Store baseline (value before the spike) +3. Wait for next reading +4. **If next reading confirms trend** (moves in same direction): + - Post the held reading + - Post the confirming reading +5. **If next reading contradicts** (moves opposite direction or returns to baseline): + - Discard held reading as glitch + - Post the new reading + +**Why hindsight validation:** +Physiological changes are gradual. Sudden spikes >3 points are usually recognition glitches or transient display artifacts. This prevents false alarms while allowing real trends. + +**Example scenario:** +``` +Baseline: SpO2=95 +Reading 1: SpO2=91 (Ī”4) → HOLD, don't post +Reading 2: SpO2=89 (Ī”2 from held) → Trend confirmed, post 91 then 89 +``` + +vs. + +``` +Baseline: SpO2=95 +Reading 1: SpO2=91 (Ī”4) → HOLD, don't post +Reading 2: SpO2=95 (back to baseline) → Discard 91 as glitch, post 95 +``` + +--- + +## Key Design Decisions + +### 1. Change-Based Posting Only +**Decision:** Only post when SpO2 or HR values actually change. + +**Rationale:** +- Reduces Home Assistant database bloat +- Avoids unnecessary network traffic +- Still captures all meaningful changes + +### 2. Template Matching Over Tesseract OCR +**Decision:** Use custom template matching instead of Tesseract. + +**Rationale:** +- Pulse oximeter display has severe overexposure +- Tesseract unreliable with bright backgrounds +- Template matching: 90%+ accuracy vs. Tesseract: ~60% +- We have stable digit shapes - perfect for template matching + +### 3. Fixed Layout After Startup +**Decision:** Detect layout once, lock it, only re-detect on low confidence retry. + +**Rationale:** +- Camera is stationary (mounted on tripod) +- Display position doesn't change +- Layout detection is expensive (~10-20ms) +- Re-detection only needed if camera moves/vibrates + +### 4. No Immediate Corruption Retry +**Decision:** Don't immediately retry after detecting corruption; just skip to next frame. + +**Rationale:** +- Corruption is a recognition issue, not camera issue +- Next frame (133ms later at 15fps) will naturally be different +- Avoids complexity of retry-within-retry scenarios +- Simpler code, same reliability + +### 5. Processing Every 4th Frame +**Decision:** Process every 4th frame (~3.75 fps from 15fps stream). + +**Rationale:** +- Balance between responsiveness and CPU usage +- SpO2/HR don't change faster than ~1 reading/second physiologically +- Allows time for processing, posting, and stability checks +- Adjusted for 15fps camera (was 6th frame for 30fps) +- Can be adjusted: 3rd frame = ~5fps, 5th frame = ~3fps + +### 6. Dark Theme HTML Review +**Decision:** Dark background with light text in review HTML. + +**Rationale:** +- Easier on eyes during long review sessions +- Better contrast for confidence scores +- Color-coded indicators more visible + +### 7. No FPS Display +**Decision:** Don't display stream FPS statistics. + +**Rationale:** +- Stream FPS is constant (30fps from camera) +- FPS logging adds noise to console output +- Processing rate (every 6th frame) is more relevant than raw stream FPS +- Keeps console focused on actual readings and warnings + +### 8. Digit Validation for '8' and '0' +**Decision:** Validate recognized '8' and '0' digits by checking for characteristic holes. + +**Rationale:** +- Template matching can falsely recognize corrupted/partial digits as '8' or '0' +- '8' validation: checks for TWO holes (at 30% and 70% Y, scanning 40-50% X) +- '0' validation: checks for ONE hole (at 50% Y, scanning 30-70% X) +- Wider scan range for '0' (30-70%) than '8' (40-50%) because '0' hole is larger +- If validation fails, digit marked as -1 (invalid) → triggers retry/re-detection +- Prevents false alarms from misrecognized digits +- Simple pixel-based validation is fast and reliable + +### 9. Alarm Mode Decolorization +**Decision:** Pre-process colored alarm backgrounds before OCR to normalize them to black/white. + +**Rationale:** +- Pulse oximeter displays colored backgrounds when in alarm mode (low SpO2 or abnormal HR) +- SpO2 alarm: Yellow/orange background, HR alarm: Cyan/blue background +- Normal thresholding fails with colored backgrounds +- Decolorization converts: white digits (RGB all > 240) → white (255), everything else → black (0) +- **Applied ONLY to display regions AFTER extraction** (in `recognizeDisplayArea()` function) +- **Layout detection works on original colored frames** - grayscale conversion handles colors fine for contour detection +- **Decolorization timing**: + 1. Layout detection: Original colored frame → grayscale → threshold → find contours (works fine) + 2. Extract display regions: Still colored (280x200px = 56K pixels) + 3. Decolorize extracted regions: Only ~100K pixels total (both displays) + 4. Continue with OCR: grayscale → threshold → template matching +- Each display region is ~280x200px = 56K pixels +- Decolorizing two display regions (~100K pixels) takes <10ms +- Threshold of 240 ensures only pure white pixels (digits/borders) are preserved, not grays +- No performance impact - simple pixel-wise operation on small regions +- Enables reliable digit recognition regardless of alarm state +- Critical for 24/7 monitoring where alarms are expected + +--- + +## Environmental Considerations + +### Light Leakage Issues +**Problem:** Ambient light creates false contours and affects brightness detection. + +**Solutions implemented:** +- Increased brightness threshold to 170 (from lower values) +- More selective contour filtering +- Height filters to exclude oversized boxes (<200px) + +**Manual mitigation:** +Position towel/cloth to block light from reflecting off display surface. + +### Camera Positioning +**Critical:** Camera must be positioned to minimize glare and ensure consistent framing. + +**Current setup:** +- Tripod-mounted camera +- RTSP stream from network camera +- Fixed zoom/focus (no autofocus) + +--- + +## Files and Directories + +### Source Files +- `pulseox-monitor.go` - Main application +- `processor.go` - Frame processor +- `validators.go` - Validation handlers +- `ocr.go` - Recognition engine +- `layout_detection.go` - Display finder +- `normalize.go` - Frame normalization +- `frame_source.go` - Frame sources (RTSP/file) +- `types.go` - Shared data structures +- `helpers.go` - Utility functions (logf) +- `homeassistant.go` - API integration +- `html_report.go` - Review page generator +- `timestamp_ocr.go` - Timestamp validation +- `config.go` - Configuration loading + +### Configuration +- `config.yaml` - Camera URL and Home Assistant settings + +### Training Data +- `training_digits/*.png` - Valid digit templates (0-9) +- `training_digits/invalid/*.png` - Corruption patterns + +### Output +- `review/` - Frame captures and digit images (cleaned each run) + - `f{N}_boxes.jpg` - Visualization of detected areas + - `f{N}_{display}_checks.png` - Digit detection analysis + - `f{N}_{display}_digit[1-3].png` - Individual digit crops + - `f{N}_{display}_full.png` - Full display with recognized value + - `review.html` - Live-updated HTML (append-only) +- `raw_frames/` - Raw processed frames for failed recognitions (cleaned each run) + - `raw_YYYYMMDD-NNNNN.png` - **TRUE raw frames** (unprocessed, portrait orientation, with timestamp) + - Only saved when recognition fails (corruption, low confidence, unrecognized) + - Used for debugging and training data collection + - **Can be tested directly**: `./pulseox-monitor raw_frames/raw_*.png` (will go through full preprocessing pipeline) +- `test_output/` - Debug visualizations (cleaned each run) + - Layout detection debug files + - Corruption debug snapshots with timestamps + +### Logs +- `pulse-monitor_YYYYMMDD_HHMMSS.log` - Detailed execution log + - Console shows only: value changes, warnings, errors + - File logs: all frames, timing, confidence scores, decisions + +--- + +## Development Workflow + +### Version Management + +**CRITICAL: Update version with every build!** + +**For AI Assistant (Claude):** +- ALWAYS increment version number when making ANY code changes +- Update BOTH pulse-monitor-new.go and PROJECT_STATE.md +- Add entry to Version History section documenting what changed +- This is MANDATORY, not optional + +**Version increment process:** + - Open `pulseox-monitor.go` + - Update `const VERSION = "v3.XX"` (increment version number) + - Document what changed in version history section below + +2. **After significant changes:** + - Update this PROJECT_STATE.md document + - Keep architecture, design decisions, and troubleshooting current + - Update "Last Verified Working" date at bottom + +3. **Commit discipline:** + - Version bump = code change + - Every feature/fix gets a version increment + - Keep PROJECT_STATE.md in sync with code + +### Adding New Training Digits + +When you find a good digit image in the review files: + +```bash +# Example: Approve f768_hr_digit3.png as digit "1" +# Creates training_digits/1_2.png (next available variant) +``` + +Current naming: `{digit}_{variant}.png` (e.g., `7_1.png`, `7_2.png`) + +### Adding Invalid Patterns + +When you find corrupted/glitchy images: + +```bash +# Move to invalid folder with descriptive name +# Creates training_digits/invalid/invalid_X.png +``` + +### Testing Changes + +1. Edit code on Mac +2. Compile: `./build.sh` +3. Run: `./pulseox-monitor` +4. Monitor console for value changes and warnings +5. Check log file for detailed diagnostics +6. Review HTML on shutdown (Ctrl+C) for visual verification + +### Performance Tuning + +**Frame rate:** +- Change `skipCount` parameter in NewRTSPSource call (currently 4) +- Located in pulse-monitor-new.go +- Values: 3=~5fps, 4=~3.75fps, 5=~3fps (for 15fps stream) + +**Confidence threshold:** +- Currently 85% for high confidence +- Located in multiple places in main loop +- Lower = more false positives, Higher = more retries + +**Delta threshold:** +- Currently 3 points for stability check +- Change in large delta handler +- Lower = stricter (more held readings), Higher = more lenient + +--- + +## Known Limitations + +1. **Camera-dependent:** Requires good camera positioning and lighting +2. **Display-specific:** Templates are specific to Masimo Rad-G display fonts +3. **No real-time HTML:** Review HTML only generated on shutdown +4. **Three-digit limitation:** Assumes HR displays as 1XX when >99, may need adjustment for other scenarios +5. **Brightness sensitivity:** Very bright ambient light can still cause issues + +--- + +## Future Enhancements (Planned) + +### Docker Deployment +**Target:** Ubuntu 22.04 server with Docker Compose + +**Requirements:** +- Dockerfile with Go + OpenCV +- Mount volumes: config.yaml, training_digits/, review/, logs/ +- Network access to RTSP stream and Home Assistant +- docker-compose.yml integration with existing HA setup + +**Considerations:** +- Image size: ~500MB-1GB with OpenCV +- Multi-stage build possible for smaller runtime image +- Review file access via scp or lightweight HTTP server + +### SQLite + Live HTML +**Goal:** Real-time progress viewing without stopping app + +**Approach:** +- Store frame data in SQLite as processed +- HTTP endpoint generates HTML on-demand from DB +- Access at `http://server:8080/review.html` anytime + +**Benefits:** +- No shutdown required to review progress +- Historical data queryable +- API potential for other tools + +### Adaptive Thresholds +**Idea:** Auto-adjust confidence and delta thresholds based on recent history + +**Example:** +- If 90% of last 100 frames >95% confidence → raise threshold to 90% +- If frequent false positives → increase delta threshold temporarily + +--- + +## Troubleshooting Guide + +### "No templates found for digit X" +**Problem:** Missing training data for a digit. + +**Solution:** Run system, review output, approve good images for that digit. + +### Frequent low confidence warnings +**Causes:** +- Camera moved/vibrated +- Lighting changed +- Focus drifted +- Display brightness changed + +**Solutions:** +- Check camera mounting +- Adjust lighting (block reflections) +- Re-focus camera if needed +- Restart to re-detect layout + +### False corruption detection +**Problem:** Valid digits matched as invalid. + +**Solution:** Review invalid templates, remove overly broad patterns. + +### Large delta causing unnecessary holds +**Problem:** Real physiological changes >3 points being held. + +**Solution:** +- Increase delta threshold (e.g., to 5) +- Or adjust hindsight validation logic +- Consider different thresholds for SpO2 vs HR + +### Values not posting to Home Assistant +**Checks:** +1. Home Assistant URL correct in config.yaml? +2. Network connectivity? +3. Sensors created in HA configuration? +4. Check log file for POST errors +5. Are values actually changing? (change-based posting only) + +--- + +## Performance Characteristics + +**Typical timing per frame (v3.48):** +- Frame acquisition: 150-250ms (camera waiting time) +- Preprocessing: 8-12ms +- Frame scaling: 3-5ms +- SpO2 recognition: 9-12ms +- HR recognition: 9-12ms +- Validation: 0-1ms +- File I/O: 0ms (normal), 40-50ms (failures only) +- Home Assistant POST: 5ms (when values change) +- **Total: 25-35ms per frame** (processing only, excluding camera wait) +- Timestamp check (every 10th frame): ~22ms (optimized with reused OCR client) + +**Debug mode (with /debug flag):** +- Additional file I/O: +40ms (saves all digit images) +- Total: 65-75ms per frame + +**With every 4th frame processing at 15fps:** +- ~3.75 readings per second maximum +- Actual posting rate: varies (only when values change) +- CPU usage: Low (single-threaded, mostly idle) +- Timestamp validation: Every ~2.5 seconds (~22ms each) + +--- + +## Version History + +### v3.57 (Current - True Raw Frame Saving) +- **CRITICAL FIX: Raw frames are now truly raw (before preprocessing)** + - **Problem in v3.56**: "Raw" frames were saved AFTER preprocessing (cropped, rotated, thresholded) + - **User confusion**: Testing with "raw" frames didn't work as expected - frames were already processed + - **Fix**: Clone and threshold raw frame IMMEDIATELY after acquisition, before ANY preprocessing + - **New flow**: + 1. Acquire colored frame from camera (portrait, with timestamp) + 2. **Clone frame → grayscale → threshold at 240 → save as rawThresholded** + 3. Continue with original colored frame → preprocess (crop + rotate) → threshold → OCR + 4. On error: Save rawThresholded to raw_frames/ + - **Result**: Saved "raw" frames are truly raw (portrait orientation, timestamp visible, not cropped/rotated) + - **File size**: ~100KB PNG (thresholded) vs 2MB colored JPG + - **Benefits**: + - Can test raw frames through full preprocessing pipeline + - See exactly what came from camera before any processing + - Small file size (thresholded) vs huge colored images + - **Impact**: When testing with raw_frames/raw_*.png, they'll go through crop+rotate+threshold like normal frames + +### v3.56 (Threshold AFTER Preprocessing) +- **CRITICAL FIX: Moved threshold to AFTER preprocessing**: Timestamp overlay was being thresholded! + - **Problem in v3.54-v3.55**: Threshold happened BEFORE preprocessing + - Colored frame → threshold → binary + - Timestamp extracted from binary frame (appeared as B&W) + - Preprocessed binary frame (cropped B&W timestamp) + - **Correct flow now**: + 1. Acquire colored frame + 2. Extract timestamp from **colored** frame (stays colored!) + 3. Preprocess colored frame (crop timestamp + rotate) + 4. **Threshold preprocessed frame** at 240 → binary + 5. Layout detection on binary + 6. OCR on binary + - **Why this is correct**: + - Timestamp area is removed BEFORE thresholding (stays colored) + - Only the actual display area (after crop/rotate) gets thresholded + - Single threshold at 240, happens once, at the right time + - **Impact**: Saved raw frames now show colored timestamp overlay (as expected) + +### v3.55 (Fixed Double Threshold Bug) +- **CRITICAL FIX: Removed duplicate threshold in saveThresholdedFrame()**: Frame was being thresholded TWICE + - **Problem**: saveThresholdedFrame() was doing grayscale conversion + threshold at 128 + - **But**: rawFrame parameter is ALREADY thresholded binary (from line 313 in pulseox-monitor.go) + - **Result**: Frames were being thresholded twice - once at 240, then again at 128 + - **Fix**: saveThresholdedFrame() now just saves the frame directly (no conversion, no threshold) + - **Impact**: Overlay/timestamp area should look consistent now + - This was the "extra" threshold causing B&W overlay to look different +- **Confirmed ONLY one threshold operation now**: + - pulseox-monitor.go line 292: Threshold at 240 (the ONLY threshold) + - layout_detection.go: No threshold (works on binary) + - ocr.go: No threshold in recognizeDisplayArea() (works on binary) + - processor.go: saveThresholdedFrame() just saves (no threshold) +- Note: ocr.go still has OLD deprecated recognizeDisplay() function with thresholds, but it's never called + +### v3.54 (Single Threshold Architecture - Had Double Threshold Bug) +- Attempted to threshold once at 240 after frame acquisition +- Removed internal thresholds from layout_detection.go and ocr.go +- BUT missed duplicate threshold in saveThresholdedFrame() → fixed in v3.55 + +### v3.53 (CPU Optimization - Thread Limiting) +- Removed duplicate ProcessedCount increment bug +- Limited OpenCV threads to 1 for minimal overhead + +### v3.52 (CPU Optimization Complete) +- **Cleaned up failed GPU acceleration attempts**: Removed AVFoundation backend code + - **Investigation findings**: GPU hardware acceleration for RTSP streams is not feasible with current OpenCV setup + - AVFoundation doesn't support network streams (RTSP), only local camera devices + - OpenCV's ffmpeg backend has VideoToolbox compiled in but doesn't auto-activate for RTSP + - Would require either: (1) patching OpenCV's videoio plugin in C++, (2) complete rewrite with different library, or (3) local transcoding proxy + - **Final optimization achieved**: CPU reduced from 50% to ~30% (40% reduction) + - Single-threaded OpenCV (gocv.SetNumThreads(1)) eliminated thread overhead + - Homebrew's OpenCV has AVFoundation/VideoToolbox support but only for local devices + - ~15% CPU for RTSP H.264 decoding (software) + ~15% CPU for OCR processing + - **Conclusion**: 30% CPU is acceptable for 24/7 monitoring, fan noise significantly reduced + +### v3.51 (Attempted GPU Acceleration - Reverted) +- **Attempted AVFoundation backend**: Failed for RTSP streams (network streams not supported) + +### v3.50 (Reduced CPU Overhead) +- **Added OpenCV thread limiting**: Dramatically reduced CPU usage and thread count + - **Problem**: OpenCV was creating 40 threads (one per CPU core) for small operations + - **Impact**: Excessive context switching, 65%+ CPU usage for simple 280x200px image operations + - **Fix**: Limited OpenCV to 1 thread with `gocv.SetNumThreads(1)` at startup + - **Rationale**: + - Our images are tiny (280x200px per display) + - We process sequentially, not in parallel + - Processing time <20ms - faster than thread spawning overhead + - Single-threaded = zero thread management overhead + - **Expected result**: + - Thread count drops from ~40 to ~5-8 (only Go runtime threads) + - CPU usage drops from 65% to <20% + - No performance loss (processing still <20ms per frame) + - **Startup message**: "šŸ”§ OpenCV threads limited to 1 (single-threaded for minimal overhead)" + - **Testing**: v3.50 with 2 threads showed 23 threads/48% CPU, so moved to 1 thread for further optimization + +### v3.49 (Fixed Frame Counter Bug) +- **CRITICAL FIX: Removed duplicate ProcessedCount increment**: Fixed skipped frame numbers + - **Problem**: `state.ProcessedCount++` was being called twice: + 1. In main loop (pulseox-monitor.go) - for every frame acquired + 2. In processor.go - when values changed and HASS posted + - **Result**: Every time values changed, the counter was incremented twice + - **Symptom**: Frame numbers would skip (e.g., #61, #62, skip #63, #64...) + - **Pattern**: Always skipped the frame number 2 positions after a HASS post + - **Fix**: Removed the duplicate increment in processor.go + - **Now**: Frame numbers increment consistently: #61, #62, #63, #64... + +### v3.48 (Major Performance Improvements) +- **CRITICAL: Moved digit image saves to DEBUG_MODE only**: Massive performance improvement + - **Problem**: OCR was saving 8 PNG files per frame (4 per display Ɨ 2 displays) + - checks.png visualization + - digit1.png, digit2.png, digit3.png (individual digits) + - full.png (labeled composite) + - **Impact**: ~5ms per PNG write Ɨ 8 files = 40ms of unnecessary file I/O per frame + - **Fix**: Wrapped all digit image saves in `if DEBUG_MODE` checks + - **Result**: OCR now takes ~9-12ms total (just template matching, no file I/O) + - **When needed**: Run with `/debug` flag to generate all digit images +- **Only save boxes.jpg on failures**: Reduced file I/O for successful frames + - Previously: Saved layout visualization (boxes.jpg) every time values changed (~51ms) + - Now: Only saves boxes.jpg when OCR fails (corruption, unrecognized, low confidence) + - Successful frames: No extra file I/O beyond what's needed +- **Performance comparison**: + - Before: Total ~80ms (Prep 10ms + Scale 4ms + OCR 20ms + FileIO 40ms + Valid 1ms + HASS 5ms) + - After: Total ~30ms (Prep 10ms + Scale 4ms + OCR 10ms + FileIO 0ms + Valid 1ms + HASS 5ms) + - **~60% faster processing** for normal operation +- **Debug workflow unchanged**: `/debug` flag still generates all images as before + +### v3.47 (Timing Instrumentation) +- **Added comprehensive timing measurements with /timing flag**: Enables detailed performance analysis + - **New /timing command line flag**: Activates timing mode (./pulseox-monitor /timing) + - **Horizontal timing table**: Shows timing breakdown for each frame + - Frame | Acquire | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total + - **Header printed every 20 frames**: Prevents scroll-back issues + - **Timing measurements**: + - **Acquire**: Frame acquisition from RTSP source - WAITING TIME (not processing) + - **Prep**: Preprocessing (crop timestamp + rotate 90°) + - **Scale**: Frame scaling to normalized width (if needed) + - **OCR_SpO2**: SpO2 display recognition (template matching) + - **OCR_HR**: HR display recognition (template matching) + - **Valid**: Validation checks (corruption, confidence, stability) - accumulated across all checks + - **FileIO**: Saving review images and debug files - accumulated across all file operations + - **HASS**: Home Assistant POST request (only when values change) + - **Total**: Wall-clock processing time (Prep + Scale + OCR + Valid + FileIO + logging overhead) + - **Note**: Acquire is separate (camera waiting time). Total may not equal sum due to logging/overhead. + - **Purpose**: Identify performance bottlenecks (e.g., excessive file I/O) + - **Implementation**: + - Added TimingData struct in types.go + - Added TIMING_MODE global flag + - Added printTimingTable() function in helpers.go + - Added timing measurements throughout processFrame() and processFrames() + - Timing data passed through entire processing pipeline + - **Example output**: + ``` + Frame | Acquire | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total + ------|---------|------|-------|----------|--------|-------|--------|------|------- + #123 | 2ms | 6ms | 0ms | 4ms | 4ms | 2ms | 38ms | 5ms | 61ms + #124 | 2ms | 6ms | 0ms | 4ms | 4ms | 1ms | 39ms | 5ms | 61ms + ``` + - **Benefits**: + - Quickly identify slow operations + - Track performance trends over time + - Validate optimization efforts + - Debug unexpected delays + +### v3.46 (Fixed "1" Detection + Raw Frame Improvements) +- **CRITICAL FIX: Lowered "1" digit detection threshold from 90% to 85%** + - **Problem**: "1" digits with 87-89% confidence were being rejected, causing misrecognition + - **Example**: "71" was being cut incorrectly because the "1" wasn't detected (87.1% < 90% threshold) + - **Result**: Wrong width used (100px instead of 72px), throwing off all cut positions + - **Fix**: Threshold lowered to 85% in `hasOneAt()` function (ocr.go:196) + - **Impact**: More reliable detection of "1" digits, especially with slight variations in brightness/focus +- **Auto-enable DEBUG_MODE for file processing** + - When testing with a file (e.g., `./pulseox-monitor raw_frames/thresh_*.png`), DEBUG_MODE automatically enabled + - Eliminates need to add `/debug` flag manually for single frame testing + - Implementation: pulseox-monitor.go:65-66 in `runSingleFrameMode()` +- **Added comprehensive debug output for digit detection** + - Shows display width at start of detection + - For each digit (3→2→1): + - Check position being tested + - hasOneAt() extraction region and confidence score + - Whether detected as "1" or not + - Width being used (72px vs 100px) + - Running total width + - Final CUT position calculated + - Region extraction coordinates and widths for all three digits + - Makes debugging cut position issues trivial - can see exact logic flow +- **Fixed raw frame saving to be truly raw** + - **Previous**: Frames were saved AFTER preprocessing (rotated, cropped) + - **Now**: Frames saved BEFORE any processing (portrait, with timestamp, untouched) + - **Processing applied**: Only thresholded (grayscale → binary) to save space as PNG + - **Benefit**: Can test raw frames with full preprocessing pipeline + - **Implementation**: Clone frame immediately after reading, pass to processFrame() +- **Fixed duplicate logging in test mode** + - **Problem**: Messages with `Both` target were printed twice in test mode + - **Cause**: globalLogger was set to os.Stdout (same as console output) + - **Fix**: Set globalLogger = nil in test mode (pulseox-monitor.go:71) + - **Result**: Clean, non-duplicated output when testing with files + +### v3.45 (Added Processing Time Measurements) +- **Added comprehensive timing measurements**: Now shows how long each frame takes to process + - **Frame processing time**: Displayed in console output: `SpO2=97%, HR=76 bpm [45ms]` + - **Frame acquisition interval**: Logged to file showing time since last frame acquired + - **Purpose**: Understand actual performance vs camera lag vs processing bottlenecks + - **What's measured**: + - Frame acquisition interval (target: 250ms for 4 fps) + - Total processing time per frame (OCR + validation + file I/O) + - Logged for both changed values (console) and unchanged values (file only) + - **Output examples**: + - Console: `SpO2=97%, HR=76 bpm [45ms]` (when value changes) + - Log file: `Frame #123: SpO2=97%, HR=76 bpm (no change) - processed in 42ms` + - Log file: `[TIMING] Frame acquired: +251ms since last (target: 250ms)` + - **Benefits**: + - Identify if processing time is exceeding 250ms target + - See actual frame rate vs target 4 fps + - Distinguish between camera timestamp lag and processing delays + - Debug performance issues with concrete data + - This addresses confusion about camera timestamp lag vs actual processing performance + +### v3.44 (Save Thresholded PNGs, Not Colored JPGs) +- **Changed error frame format**: Now saves thresholded black/white PNG instead of colored JPG + - **Rationale**: Thresholded frames show exactly what the OCR sees, making debugging much more effective + - **Previous**: Saved raw colored camera frames that required manual processing to debug + - **Now**: Saves thresholded (binary) PNG showing the exact image fed to template matching + - **Processing**: Grayscale conversion → threshold at 128 → save as PNG + - **Filename format**: `raw_frames/thresh_YYYYMMDD-NNNNN.png` + - **Saved on**: + - Corruption detection (invalid template match) + - Unrecognized digits (negative values) + - Low confidence (first attempt) + - Low confidence after retry + - **Benefits**: + - Can replay frames directly through OCR pipeline + - See exactly what template matching is working with + - Identify missing templates or threshold issues immediately + - No need to manually preprocess frames for debugging + - PNG format (lossless) vs JPG (lossy with compression artifacts) + - **Storage**: ~50-100KB per frame vs 200-400KB for colored JPG + +### v3.43 (Fixed Timing + Removed Raw Frames) +- **Fixed frame acquisition timing**: 250ms timer now starts from frame ACQUISITION, not processing completion + - **Previous behavior**: Waited 250ms after processing finished (variable frame rate) + - **New behavior**: Waits 250ms from when frame was acquired (consistent 4 fps) + - **Implementation**: Set `lastProcessedTime = now` BEFORE returning frame, not after processing + - **Example**: If processing takes 50ms, next frame acquired at exactly 250ms from previous acquisition + - **Result**: Guaranteed exactly 4 fps regardless of processing time variations +- **Removed raw frame saving**: System no longer saves unprocessed camera frames + - **Rationale**: Thresholded digit images (step4) are already saved for all failures + - **What's saved on errors**: + - review/f{N}_spo2_digit1.png, digit2, digit3 (thresholded crops) + - review/f{N}_hr_digit1.png, digit2, digit3 (thresholded crops) + - review/f{N}_spo2_full.png, hr_full.png (labeled displays) + - review/f{N}_spo2_checks.png, hr_checks.png (visualization) + - **Removed**: + - raw_frames/ directory no longer created + - rawFrame parameter removed from processFrame() + - rawFrame cloning removed from processFrames() loop + - **Benefits**: Reduced memory usage, simpler code, less disk I/O + - Thresholded images are more useful for debugging than raw camera frames + +### v3.42 (Time-Based Frame Processing) +- **Changed from skip-based to time-based frame processing**: More reliable and precise timing + - **Previous approach**: Process every 4th frame (variable timing depending on stream fps) + - **New approach**: Process frames at exactly 4 fps (250ms minimum interval) + - **Implementation**: + - Track `lastProcessedTime` timestamp + - Only process frame if 250ms has elapsed since last processed frame + - Guarantees consistent 4 fps processing rate regardless of stream fps + - **Benefits**: + - Predictable, consistent processing rate + - Works correctly even if stream fps varies + - Simpler to understand and configure + - **Updated NewRTSPSource()**: Removed `skipCount` parameter, now uses hardcoded 250ms interval + - Console message: "šŸ“Š Processing frames at 4 fps (250ms minimum interval)" +- **Confirmed**: All step debug images only saved in DEBUG_MODE + - step1_original.png (only with /debug flag) + - step2_decolorized.png (only with /debug flag) + - step3_grayscale.png (only with /debug flag) + - step4_thresholded.png (only with /debug flag) + +### v3.41 (Critical Performance Fix) +- **CRITICAL: Fixed excessive debug image writes causing high resource usage** + - **Problem**: Saving 8 debug images PER FRAME (4 per display x 2 displays) + - **Impact**: At 3.75 fps = ~30 images/second = massive disk I/O and CPU usage + - **Images being saved on every frame**: + - step1_original.png (colored extraction) + - step2_decolorized.png (after alarm mode decolorization) + - step3_grayscale.png (after grayscale conversion) + - step4_thresholded.png (after binary threshold) + - **Fix**: Only save these debug images when DEBUG_MODE is enabled + - **Result**: Normal operation now only saves essential files (digit crops, checks, full) + - **To enable debug images**: Run with `./pulseox-monitor /debug` flag + - **Performance improvement**: ~70% reduction in disk I/O, significant CPU savings + - This was a leftover from debugging alarm mode decolorization that should never have been in production + +### v3.40 (Actionable Layout Detection Errors) +- **Enhanced layout detection error diagnostics**: Now saves debug visualization when "Only 1 digit display found" error occurs + - **Problem**: Error message was not actionable - couldn't see what boxes were detected or why they failed + - **Fix**: Save timestamped debug image showing all center boxes with labels + - **Debug visualization shows**: + - All boxes crossing 50% line (center region) + - Green boxes = qualified as digit displays (height >= 110px) + - Cyan boxes = rejected (too short) + - Box labels showing dimensions: "#0: H=XXpx OK" or "#1: H=XXpx TOO SHORT" + - Red line showing 50% Y position + - Summary at top: "Found: X center boxes, Y digit displays (need 2)" + - **Enhanced console error output**: + - Shows count of center boxes found + - Lists each box with dimensions and qualification status + - "āœ“ QUALIFIED" for boxes that passed height requirement + - "āœ— TOO SHORT (< 110px)" for boxes that were rejected + - Path to debug image: test_output/layout_error_YYYYMMDD_HHMMSS.jpg + - Makes debugging layout failures much faster - can immediately see what system detected + +### v3.39 (Fixed Frame Scaling Order) +- **CRITICAL FIX: Frame scaling now happens in correct order** + - **Problem**: Boxes were calculated on original frame, then frame was scaled, causing coordinate mismatch + - **Root cause**: Layout detection calculated boxes on unscaled frame, but processFrame() scaled the frame later + - **Fix**: Layout detection now scales frame IMMEDIATELY after calculating scale factor (Step 4) + - **Process flow**: + 1. detectScreenLayoutAreas() calculates scale from bounding box width + 2. Scales frame to 860px width using gocv.Resize() + 3. **Scales ALL coordinate arrays** (bounding box, allBoxes, significantBoxes) + 4. ALL subsequent processing (contour detection, box calculation) uses scaled frame AND scaled coordinates + 5. Returns boxes in scaled coordinates + scale factor + 6. processFrame() scales every frame using same scale factor + 7. Boxes and frames now match perfectly + - **Code changes**: + - layout_detection.go: Added frame scaling at Step 4 (after scale calculation) + - layout_detection.go: Scale all coordinate arrays (scaledBoundingBox, scaledAllBoxes, scaledSignificantBoxes) + - All debug visualizations now use scaled frame (step 5-15 images) + - Removed tryAcquireLayout() wrapper function (unnecessary abstraction) + - pulseox-monitor.go: Calls detectScreenLayoutAreas() directly, stores layout+scale in state + - processor.go: Keeps scaling logic in processFrame() (needed for every frame) + - **Benefits**: + - Boxes calculated on 860px frame match frames that are scaled to 860px + - No more coordinate mismatches between layout detection and OCR processing + - Simpler code flow - no useless wrapper functions + - Alarm box trimming (Steps 8-10) preserved and working correctly + - **Verified**: Bounding box width log will show ~860px after scaling (was variable before) +- **BUG FIX: Review entries now use correct frame numbers** + - **Problem**: Successful readings used `state.FrameCount` (always 0) instead of actual `frameNum` + - **Result**: All images referenced f0_* instead of actual frame numbers + - **Fix**: Changed 3 instances in processor.go to use `frameNum` parameter + - Affected: Invalid physiological values, successful readings, unstable readings + - Failed readings (corruption, unrecognized, low confidence) were already correct +- **OPTIMIZATION: Only save images for frames added to review** + - **Problem**: Every processed frame saved images, even if values didn't change + - **Result**: review/ folder filled with unused images from StatusNoChange frames + - **Fix**: Delete digit images when values haven't changed (StatusNoChange) + - Saves disk space and makes review folder only contain relevant frames + - Deleted files: f*_spo2_checks.png, f*_spo2_digit[1-3].png, f*_spo2_full.png, f*_hr_*.png + +### v3.38 (Dynamic Frame Normalization) +- **Implemented dynamic frame scaling based on bounding box width**: Handles unstable device position + - **Problem**: Pulse oximeter lays loose on bed, causing frame width to vary + - **Solution**: Measure bounding box width in Step 4 of layout detection + - **Process**: + 1. Calculate scale factor: `scale = 860 / boundingBoxWidth` + 2. Log bounding box width and scale factor to console + 3. Store scale factor in ProcessingState.LockedScale + 4. Apply scaling to every frame before OCR processing + - **Changes**: + - `detectScreenLayoutAreas()` now returns `(*ScreenLayout, float64, error)` - includes scale factor + - `tryAcquireLayout()` captures and stores scale in state + - `processFrame()` applies scaling if `state.LockedScale != 1.0` + - Logs: "šŸ“Š Bounding box width: Xpx, Scale factor: Y (target: 860px)" + - **Benefits**: + - Consistent 860px normalized width regardless of camera position/zoom + - Layout coordinates remain stable across frames + - OCR accuracy maintained even when device moves + +### v3.37 (Fixed Layout Detection Height Threshold) +- **Fixed digit display height threshold**: Changed from > 120px to >= 110px + - **Problem**: Layout detection was failing because digit displays were exactly 110-120px tall + - **Previous code**: Required height > 120px (too restrictive) + - **Fixed code**: Requires height >= 110px (matches MIN_BOX_HEIGHT constant) + - **Added debug logging**: Shows dimensions of each center box and whether it's accepted/rejected + - Helps diagnose layout detection failures + - Consistent with original design intent (MIN_BOX_HEIGHT = 110) + +### v3.36 (Unified Logging) +- **Consolidated all logging to use logMessage() function**: Replaced all fmt.Printf(), fmt.Println(), and inconsistent logging patterns + - **ocr.go**: Converted template loading messages to use logMessage() + - Template warnings → Console + Warning level + - Template loading success → Console + Info level + - **frame_source.go**: Converted frame processing info message + - "Processing every Nth frame" → Console + Info level + - **normalize.go**: Converted all debug output messages + - Decolorization progress → LogFile + Debug level + - Debug file saves → LogFile + Debug/Info level + - **All files now use consistent logging pattern**: + - Startup/shutdown → Console + Info + - Main readings → Both + Info + - Warnings/errors → Both + Warning/Error + - Debug messages → LogFile + Debug (respects DEBUG_MODE flag) + - Progress indicators → Console + Info + - **Benefits**: + - Single source of truth for logging behavior + - Consistent timestamp formatting across all logs + - Proper level-based filtering + - Clean separation of console vs file logging + - Debug mode properly controls debug output + +### v3.35 (Logging System Implementation) +- **Implemented logMessage() function in helpers.go**: New unified logging system + - Target: Console, LogFile, Both + - Level: Debug, Info, Warning, Error + - Single-letter prefixes (D, I, W, E) for clean output + - Automatic DEBUG_MODE filtering for debug messages + - Timestamp formatting: [HH:MM:SS.mmm] + - Eliminates need for checking if logger != nil throughout codebase + +### v3.34 (Correct Order + Debug Images) +- **Fixed order of layout detection**: Center region FIRST, digit displays SECOND + - **Correct process order**: + 1. Find ALL contours with RetrievalList + 2. Collect all significant boxes (>50px width or height) + 3. Calculate bounding box from ALL boxes (not just digit displays) + 4. Find 50% Y line from overall bounding box + 5. Filter to boxes crossing 50% line (center region) + 6. From center boxes, identify digit displays (height >= 110px) + 7. Find alarm icons (center boxes extending beyond digit displays) + 8. Trim digit displays based on alarm icons + 9. Split by center X + 10. Find rightmost X in each half (using box center) + 11. Create fixed-width boxes (280px) + - **Debug images at every step** saved to test_output/: + - layout_step1_grayscale.jpg + - layout_step2_threshold.jpg + - layout_step3_eroded.jpg + - layout_step4_all_boxes.jpg (all boxes >50px in green) + - layout_step5_center_boxes.jpg (boxes crossing 50% line in cyan, red line) + - layout_step6_digit_displays.jpg (digit displays in magenta) + - layout_step7_alarm_icons.jpg (alarm icons in yellow, if any found) + - layout_step8_trimmed_displays.jpg (trimmed displays in green) + - layout_step9_final_boxes.jpg (final SpO2/HR boxes with labels) + - Key fix: Bounding box calculated from ALL contours, not just digit displays + - This prevents missing digit displays that are outside early bounding calculations + +### v3.33 (Contour Debug Logic + Center Region - WRONG ORDER) +- **Implemented contour_debug.go logic combined with center region approach**: As instructed + - **Exact implementation of contour_debug.go detection:** + 1. Find ALL contours with RetrievalList + 2. First pass: Identify digit displays (height >= 110px) + 3. Second pass: Find alarm icons (boxes extending beyond digit displays on right) + 4. Trim digit displays: Find leftmost alarm icon edge that overlaps, set as right boundary + - **Combined with center region logic:** + 5. Calculate bounding box from trimmed digit displays + 6. Find 50% Y line + 7. Filter to displays crossing 50% line (center region) + 8. Split by center X + 9. Find rightmost X in each half (using box center to determine which half) + 10. Create fixed-width boxes (CUT_WIDTH = 280px) + - **Key differences from v3.32:** + - Works with TRIMMED digit displays (after alarm icon trimming) + - Alarm icons properly detected and used to trim display boundaries + - Follows exact contour_debug.go algorithm before applying center logic + - Diagnostic output shows trimming actions when alarm icons detected + +### v3.32 (Fixed Layout Detection - WRONG APPROACH) +- **Reverted to proven layout detection logic**: Fixed incorrect box detection from v3.30 + - **Problem in v3.30/v3.31**: Alarm icon detection was finding wrong boxes (yellow alarm square instead of digit displays) + - **Fix**: Restored working center region approach without alarm icon complications + - **Process**: + 1. Find all significant contours (>50px width or height) + 2. Calculate overall bounding box + 3. Filter to boxes crossing 50% Y line with height >= 110px (digit displays) + 4. Create center region from filtered boxes + 5. Split by center X + 6. Find rightmost X in each half using **box center** to determine which half + 7. Create fixed-width boxes (CUT_WIDTH = 280px) + - Key insight: Use box center (not box edge) to determine left vs right half + - Alarm icon trimming removed for now - adds complexity without clear benefit + - Focuses on reliable detection of actual digit display boxes + - Debug images from v3.31 retained for troubleshooting + +### v3.31 (Decolorization After Extraction) +- **Moved decolorization to after display area extraction**: Layout detection now works on original colored frames + - **Problem**: Layout detection was trying to threshold colored alarm backgrounds, failing to find displays + - **Fix**: Layout detection now works on original colored frame (grayscale + threshold) + - **Decolorization moved**: Now happens in `recognizeDisplayArea()` AFTER extracting the small display regions + - **Process flow**: + 1. Preprocess (crop + rotate) - original colored frame + 2. Detect layout on colored frame (grayscale → threshold → find contours) + 3. Extract display areas (280x200px) - still colored + 4. **Decolorize extracted regions** (only ~56K pixels per display) + 5. Continue with OCR (grayscale → threshold → template matching) + - **Debug images added**: Each processing step now saves debug images: + - `f{N}_{display}_step1_original.png` - Original extracted colored region + - `f{N}_{display}_step2_decolorized.png` - After decolorization + - `f{N}_{display}_step3_grayscale.png` - After grayscale conversion + - `f{N}_{display}_step4_thresholded.png` - After thresholding (ready for OCR) + - Allows visual verification of each processing step + - Decolorization only affects OCR, not layout detection + +### v3.30 (Simplified Layout Detection) +- **Simplified layout detection with alarm icon trimming**: Combined best of both approaches + - **Uses RetrievalList** to find ALL contours (including nested ones like alarm icons) + - **Single contour pass**: No more complicated center region extraction and re-detection + - **Alarm icon detection**: Finds boxes that extend beyond digit displays (clock icons, alarm indicators) + - **Smart trimming**: Trims BOTH SpO2 and HR displays if alarm icons found, otherwise uses regular boxes + - **Process**: + 1. Find all contours with RetrievalList + 2. Identify digit displays (height >= 110px) + 3. Find alarm icons (boxes extending beyond digit displays on the right) + 4. Trim digit displays based on leftmost alarm icon edge (if any) + 5. Split displays into left (SpO2) and right (HR) halves using center X + 6. Find rightmost X in each half for display boundaries + 7. Create fixed-width boxes (CUT_WIDTH = 280px) + - Much simpler and more reliable than previous double-contour approach + - Combines center region logic with direct contour approach from contour_debug.go + +### v3.29 (Correct Decolorization Scope) +- **Fixed decolorization scope**: Removed wasteful full-frame decolorization + - **Problem in v3.28**: Decolorizing entire 1390x2736 frame (3.8M pixels) took ~2000ms + - **Reality**: Decolorization only needed for small display regions during OCR + - **Correct flow**: + 1. Preprocess (crop + rotate) - original colored frame + 2. Detect layout on original frame (works fine with colored backgrounds) + 3. Extract display areas (280x200px each) + 4. Decolorize ONLY display regions in `recognizeDisplayArea()` (~100K pixels) + 5. Run OCR on decolorized regions + - Decolorization already exists in `ocr.go` at the right place (line ~281) + - No need for full-frame decolorization - layout detection works on colored frames + - **Performance**: ~40x faster (decolorizing 100K pixels vs 3.8M pixels) + +### v3.28 (Decolorization Timer - REVERTED) +- **Added decolorization timing**: Timer measures how long decolorizeFullFrame() takes + - Timer added after preprocessing in main processing loop + - Displays: "[TIMER] Decolorization took: Xms" + - **Restored decolorization to pipeline**: Was defined but not being called! + - Decolorization now runs between preprocessing and layout detection + - Enables alarm mode support (colored backgrounds → black/white) + - Allows measurement and optimization of decolorization performance + - Frame lifecycle: raw → preprocess → decolorize → detect/process + +### v3.27 (Save ACTUAL Raw Frames) +- **CRITICAL FIX: Raw frames are now truly raw (unprocessed)** + - **Problem**: `raw_frames/raw_*.png` files were being saved AFTER preprocessing (rotation + crop) + - **Result**: When testing with "raw" frames, they were rotated AGAIN, making displays vertical instead of horizontal + - **Fix**: Clone raw frame immediately after reading from source (line 233) + - **Flow now**: + 1. Read frame from source (portrait orientation) + 2. Clone raw frame → keep for potential error saving + 3. Preprocess: crop timestamp + rotate 90° CW + 4. Decolorize + 5. Process + 6. IF error (corruption, low confidence, unrecognized) → save RAW clone + 7. Close raw frame clone + - Raw frames saved in `processor.go` at error points using the `rawFrame` parameter + - Raw frames are now truly unprocessed - can be used for testing by running through full pipeline + - Fixes issue where testing with raw frames would double-rotate them + +### v3.26 (Fixed Redundant Decolorization) +- **CRITICAL FIX: Eliminated 4x redundant decolorization calls**: Now decolorizes ONCE at top level only + - **Problem**: Frame was being decolorized 4 times for every layout detection: + 1. In `detectScreenWidth()` (2668x1458 - full frame) + 2. In `saveNormalizationDebug()` (2668x1458 - same frame again!) + 3. In `detectScreenLayoutAreas()` (2130x1164 - center region) + 4. In `detectScreenLayoutAreas()` center region (859x518 - smaller region) + - **Each call took 2-3 seconds** - total 8-12 seconds wasted per frame! + - **Fix**: Removed ALL decolorization calls from normalize.go and layout_detection.go + - **Now**: Single decolorization in `pulseox-monitor.go` at line 259-267 + - **Result**: Frame processing time reduced from ~12 seconds to ~3 seconds + - Decolorized frame passed to all layout functions - they expect pre-decolorized input + - Simplified `decolorizeFullFrame()` output - removed progress indicators (10%, 20%, etc.) + - Added comments documenting that functions expect already-decolorized frames + +### v3.25 (Aggressive Debug Output) +- **Added extensive debug image saves**: Every processing step now saves an image to test_output/ + - step1_original.jpg - Frame after preprocessing (rotated) + - step2_decolorized.jpg - After decolorizeFullFrame() (should be black/white) + - step3_grayscale.jpg - After grayscale conversion + - step4_thresholded.jpg - After binary threshold +- **Added decolorization progress tracking**: + - Shows progress every 10% of rows processed + - Shows final statistics: white pixel count/percentage, black pixel count/percentage + - Helps diagnose if decolorization is working correctly +- **Purpose**: Debug why alarm mode decolorization appears to not be working + +### v3.24 (Fixed Layout Decolorization) +- **Fixed missing decolorization in layout detection**: Layout detection was still using colored frames + - Added `decolorizeFullFrame()` call in `detectScreenLayoutAreas()` function + - Previously: Only `detectScreenWidth()` was decolorizing, but `detectScreenLayoutAreas()` was not + - Problem: Layout detection was converting colored frame directly to grayscale + - Result: Alarm mode colored backgrounds prevented contour detection + - Fix: Decolorize BEFORE grayscale conversion in both contour detection passes: + 1. Initial contour detection (full frame) + 2. Center region contour detection + - Now: Layout detection works correctly in both normal and alarm modes + - This was the missing piece preventing alarm mode from working end-to-end + +### v3.23 (Higher White Threshold) +- **Increased white pixel detection threshold**: Changed from 200 to 240 for more selective white detection + - Updated in both `decolorizeFullFrame()` (normalize.go) and `decolorizeAlarmMode()` (ocr.go) + - Previous threshold (200): Too permissive, caught near-white/gray pixels + - New threshold (240): More selective, only truly white pixels (near 255) are preserved + - Logic: If RGB all > 240 → keep as white (255), else → convert to black (0) + - Rationale: Display digits are pure white (255), not gray, so higher threshold is more accurate + - Better separation between white digits/borders and colored alarm backgrounds + +### v3.22 (Decolorize Before Layout) +- **Moved decolorization before layout detection**: Alarm mode now works end-to-end + - New `decolorizeFullFrame()` function in normalize.go + - Applied BEFORE layout detection (in `detectScreenWidth()`) + - Previously: Decolorization only happened during OCR (too late) + - Now: Decolorization happens first, then layout detection sees clean white-on-black displays + - Fixes issue where colored alarm backgrounds prevented layout detection + - Layout detection flow: + 1. Decolorize full frame (yellow/cyan → black, white → white) + 2. Convert to grayscale + 3. Threshold at 170 + 4. Find contours + - Same decolorization also applied in `saveNormalizationDebug()` for accurate debug visualization + - Result: System can now detect and recognize displays in both normal and alarm modes + +### v3.21 (Preprocessing Debug) +- **Added debug output for preprocessing**: Prints frame dimensions before and after preprocessing + - Shows: "Before preprocess: WxH" and "After preprocess: WxH" + - Helps verify that rotation is actually happening + - Expected: Portrait (e.g., 1080x1920) → Landscape (e.g., 1920x1080) + - Temporary diagnostic output to verify preprocessFrame() is working + +### v3.20 (Unified Preprocessing) +- **Verified unified preprocessing path**: Both RTSP streaming and file test modes use identical preprocessing + - All frames go through `preprocessFrame()` function: + 1. Crop timestamp (top 68 pixels) + 2. Rotate 90° clockwise (portrait → landscape) + - Applied in unified `processFrames()` loop (line 250) + - Works for both `RTSPSource` and `FileSource` + - **Important**: Raw frames saved to `raw_frames/` are saved AFTER preprocessing + - Therefore: Raw frames are already rotated and ready for testing + - If testing with unrotated frames, they'll go through preprocessing automatically + - Ensures consistent behavior regardless of frame source + - Layout detection expects landscape orientation (SpO2 and HR side-by-side horizontally) + +### v3.19 (Alarm Mode Support) +- **Added alarm mode decolorization**: System now recognizes digits even when pulse oximeter is in alarm state + - New `decolorizeAlarmMode()` function converts colored alarm backgrounds to black + - Alarm mode displays: + - SpO2: Yellow/orange background with white digits + - HR: Cyan/blue background with white digits + - Decolorization logic: + - Pixels where ALL RGB values > 200 → kept as white (digits) + - All other pixels → converted to black (colored backgrounds, borders) + - Applied before grayscale conversion in OCR pipeline + - Enables normal thresholding and template matching to work correctly + - Previously: Colored backgrounds would confuse thresholding, causing OCR failures + - Now: Alarm mode digits recognized just as reliably as normal mode + - Example: "90" in yellow box → converted to "90" in black/white → OCR succeeds + +### v3.18 (Zero Validation) +- **Added '0' digit validation**: Prevents false '0' recognition by checking for characteristic center hole + - New `validateZero()` function checks for a hole at 50% Y (center) + - Scans horizontally from 30%-70% X looking for black pixels (hole) + - Wider horizontal range than '8' validation (30-70% vs 40-50%) since '0' hole is larger + - If validation fails, marks digit as invalid (-1) triggering retry/re-detection + - Applied to all three digit positions (digit1, digit2, digit3) + - Complements existing '8' validation (two holes at 30% and 70% Y) + - Helps prevent misrecognition of corrupted/partial digits or digits with missing centers as '0' + - Example log output: + ``` + validateZero: center hole @ y=50 (50%) x=15-35 (true) + ``` + +### v3.17 (Enhanced Cleanup) +- **Enhanced directory cleanup on startup**: Now cleans all output directories in streaming mode + - Previously: Only `review/` was cleaned on startup + - Now: Cleans `review/`, `raw_frames/`, and `test_output/` in streaming mode + - Single-frame test mode: No cleanup (preserves test output) + - Progress display: Shows each directory being cleaned with checkmarks + - Example output: + ``` + šŸ—‘ļø Cleaning output directories... + - review/... āœ“ + - raw_frames/... āœ“ + - test_output/... āœ“ + ``` + - Rationale: `raw_frames/` accumulates failed recognition frames over time; clean slate each run + - Rationale: `test_output/` contains debug visualizations that should be fresh per session + +### v3.16 (Append-Only HTML Review) +- **Implemented append-only HTML generation**: Review HTML is now written incrementally instead of all at once on shutdown + - HTML header written once at startup via `initReviewHTML()` + - Each frame entry appended immediately via `appendReviewEntry()` as it's processed + - Footer written on shutdown via `closeReviewHTML()` + - Browser handles missing closing tags gracefully, allowing live refresh + - Benefits: + - āœ… Live updates: refresh browser anytime to see latest frames + - āœ… No regeneration overhead: just one file write per frame + - āœ… Can run for hours without memory/performance issues + - āœ… No data loss if process crashes (all processed frames already written) + - All review entry points now consistently call `appendReviewEntry()`: + - Corruption detection + - Unrecognized digits + - Low confidence (both first attempt and retry) + - Invalid physiological values + - Successful readings + - Unstable readings (held for validation) + - Fixed duplicate `appendReviewEntry()` calls in corruption handler (was calling 3 times) + +### v3.15 (Fixed Eight Validation) +- **Fixed validateEight() probe positions**: Corrected hole detection based on actual measurements + - Issue: Was probing at 33% and 67% which hit the white digit and middle bar + - Fix: Now checks horizontal line segments at 30% and 70% height + - Scans X from 40% to 50% of width (10% range centered around middle) + - Looks for ANY black pixel (< 128) in that range = hole detected + - Much more robust than single pixel check - tolerates slight variations + - Holes are BLACK (empty space), digit is WHITE in thresholded image + - Prevents false rejection of valid "8" digits + +### v3.14 (Corruption Debug Output) +- **Added debug output for corruption cycles**: Helps diagnose repeated corruption detection + - When corruption is detected, saves debug files to `test_output/` with timestamp + - Saves normalized frame: `corruption_YYYYMMDD_HHMMSS_frameN_normalized.png` + - Saves layout visualization: `corruption_YYYYMMDD_HHMMSS_frameN_layout.jpg` + - Also includes all digit crops from review/ directory (already generated by OCR) + - Allows offline analysis when system gets stuck in corruption → re-detect → corruption cycles + - Log message: "šŸ’¾ Debug saved: YYYYMMDD_HHMMSS" + +### v3.13 (RTSP Reconnect Fix + Eight Validation) +- **Fixed segmentation fault on RTSP reconnect failure**: Prevents crash when stream reconnection fails + - Added null check before reading from stream + - Sets stream to nil and marks source as closed after failed reconnect + - Returns shouldContinue=false to stop processing instead of attempting to read from null stream + - Prevents SIGSEGV when VideoCapture is null +- **Added '8' digit validation**: Prevents false '8' recognition by checking for two characteristic holes + - New `validateEight()` function probes two specific regions (at 1/3 and 2/3 height) + - Checks if both regions are at least 60% black (indicating holes) + - If validation fails, marks digit as invalid (-1) triggering retry/re-detection + - Applied to all three digit positions (digit1, digit2, digit3) + - Helps prevent misrecognition of corrupted/partial digits as '8' + +### v3.12 (Physiological Value Validation) +- **Added minimum value threshold for Home Assistant posting**: Prevents posting physiologically impossible values + - Validation rule: Both SpO2 and HR must be >= 40 to post to Home Assistant + - Values below 40 are logged to file and added to review, but NOT posted to HASS + - Console displays: "āš ļø Values below 40 - not posting to HASS" + - Rationale: SpO2 or HR below 40 are not physiologically possible in living patients + - Prevents false alarms and invalid data in Home Assistant database + - Common scenarios triggering this: + - Pulse oximeter disconnected (shows dashes, may be misread as low numbers) + - Recognition errors resulting in single-digit values (0-9) + - Display corruption being partially recognized + +### v3.11 (Reduced Raw Frame Storage) +- **Raw frames now only saved on recognition failures**: Significantly reduces disk space usage + - Previously: Every processed frame saved to `raw_frames/` (all successes and failures) + - Now: Only saves frames when recognition fails (corruption, unrecognized, low confidence) + - Successful readings and unstable readings (held for validation) no longer saved + - Failure cases where frames are saved: + - āŒ Corrupted frames (invalid pattern matched) + - āŒ Unrecognized digits (negative values) + - āŒ Low confidence readings (both first attempt and retry) + - Typical result: ~90% reduction in raw frame storage for stable monitoring + - Failed frames still available for debugging and training data collection + +### v3.10 (Corruption Frame Number) +- **Added frame number to corruption log message**: Easier debugging of false corruption detections + - Message now shows: `[CORRUPTION] Frame #123 - Digit = -1 detected: SpO2(9,5) HR(7,-1) - skipping frame` + - Helps identify which specific frame triggered invalid pattern match + - Useful for reviewing frames in HTML output and approving/removing invalid templates + +### v3.9 (Simplified Digit Detection Arrays) +- **Simplified digit detection to use only arrays**: Removed all intermediate variables + - `D[1,2,3]` array stores cut positions for digit extraction + - `digitIsOne[1,2,3]` array stores detection results (is it a "1"?) + - Eliminated intermediate variables like digit1IsOne, digit2IsOne, digit3IsOne, middleStart, rightStart + - Code now directly uses array notation throughout: `digitIsOne[2]`, `D[3]`, etc. + - Cleaner, more maintainable code with single source of truth +- **Fixed "13" recognition bug**: D1 position now correctly calculated from accumulated widths + - Loop accumulates widths: digit3 → digit2 → digit1 + - D1 check position = w - digit3Width - digit2Width - 5 + - Previously was checking at wrong position when only 2 digits present + +### v3.8 (Code Cleanup & Digit Detection Loop) +- **Added helpers.go**: New file with `logf()` helper function + - Eliminates repetitive `if logger != nil` checks throughout codebase + - Makes logging calls cleaner and more maintainable + - Applied across processor.go, validators.go, and ocr.go +- **Refactored digit detection to use loop**: Replaced repetitive digit checking code with elegant loop + - Uses arrays for digit names, detection results, and widths + - Accumulates width progressively (digit3 → digit2 → digit1) + - More maintainable and easier to understand + - Eliminates code duplication + +### v3.7 (3-Digit Value Calculation Fix) +- **Fixed 3-digit value calculation bug**: Removed score requirement for digit1 detection + - Issue: When `digit1IsOne` detected a "1" digit, code also required template match score > 50% + - Problem: digit1 region includes empty padding, causing low template match scores + - Result: 106 was calculated as 6, 103 as 3, 104 as 4 + - Fix: Trust `hasOneAt()` detection alone - if digit1IsOne is true, use 100 + num2*10 + num3 + - The `hasOneAt()` function already confirms there's a "1" pattern at the correct position + +### v3.6 (Millisecond Timestamps) +- **Added millisecond precision to timestamps**: Changed timestamp format from `15:04:05` to `15:04:05.000` + - Provides more precise timing for frame processing and reading logs + - Helpful for debugging timing issues and frame processing delays + - Format: `[03:03:17.245] SpO2=93%, HR=85 bpm` + +### v3.5 (Frame Rate + Timestamp Validation) +- **Frame rate adjustment**: Changed from 30fps to 15fps camera + - Adjusted processing from every 6th frame to every 4th frame + - Processing rate: ~3.75fps (was ~5fps) + - Camera was struggling with 30fps, reduced to 15fps +- **Timestamp validation**: Added OCR check of camera timestamp + - Validates timestamp every 10 processed frames (~2.5 seconds) + - Warns if camera time differs by >3 seconds from server + - Uses optimized gosseract library (~22ms per check) + - **Optimizations**: Downscale 50%, grayscale, Otsu threshold, invert, PNG encode + - **Reused OCR client** for major speed boost (was 120ms, now 22ms warm) + - Silent when drift is within ±3 seconds + - Helps detect frozen/lagging streams +- **New file**: `timestamp_ocr.go` - Timestamp extraction and validation + +### v3.4 (Low Confidence Counter) +- **Added low confidence counter**: Tracks and displays frequency of low confidence frames + - New `LowConfidenceCount` field in ProcessingState + - Console displays count when it increments: "Low confidence (#X)" + - Helps identify camera/lighting issues needing attention + - Counter appears for both first detection and retry failures + +### v3.3 (Corruption Detection Fix) +- **Fixed corruption detection**: Direct check prevents invalid frames from reaching stability + - Changed from `validateCorruption()` wrapper to direct `IsCorrupted()` call + - ANY digit = -1 (invalid template match) now immediately returns StatusCorrupted + - Prevents invalid/corrupted frames from being marked as "unstable" + - Ensures clean separation: corruption → silent skip, low confidence → retry + +### v3.2 (Review & Signal Handling) +- **Fixed Ctrl+C handling**: Simplified signal handling to prevent segfaults + - Removed stream.Close() from signal handler (was causing SIGSEGV) + - Stream cleanup now happens naturally on program exit + - Sets closed flag only, checks happen between frame reads +- **Enhanced review.html**: Failed frames now show all digit images + - Corruption, low confidence, and unrecognized frames display digit crops + - Allows reviewing and approving digits from failed recognitions + - Red background distinguishes failed frames +- **Silenced corruption detection**: Invalid patterns work quietly + - Corrupted frames no longer appear in console or HTML + - Only logged to file for debugging purposes + - Keeps review focused on actionable items + +### v3.1 (Optimizations) +- **Removed FPS display**: Cleaned up console output + - No more "Stream FPS: X (avg over Y frames)" messages + - Simpler, focused console showing only readings and warnings +- **Negative value handling**: Unrecognized digits now trigger immediate retry + - SpO2 or HR < 0 (OCR returned -1) treated as low confidence + - Triggers immediate next frame read + layout re-detection + - Prevents false "unstable" warnings for blurry/unrecognized frames +- **Frame rate adjustment**: Changed from every 4th to every 6th frame + - ~5 fps processing rate from 30fps stream + - Better balance for 30fps camera (was tuned for 15fps) + +### v3.0 (Refactored) +- **Modular architecture**: Split monolithic pulse-monitor.go into focused modules + - processor.go: Frame processing orchestration + - validators.go: Validation logic + - frame_source.go: Abstracted sources (RTSP/file) + - types.go: Shared data structures + - normalize.go: Frame normalization +- **Smart escalation strategy**: Progressive failure handling + - 1st failure: Try next frame (0s wait) + - 2nd failure: Re-detect layout (0s wait) + - 3rd failure: Wait 10s + - 4th failure: Wait 30s + - 5th+ failures: Wait 60s + - Success resets counter +- **Interruptible sleep**: Ctrl+C works immediately during waits + - Checks `source.IsActive()` every second + - No more forced 60s waits before shutdown +- **ConsecutiveFailures counter**: Tracks problems across processing + - Incremented on layout failures, corruption, low confidence + - Reset on successful processing + - Enables intelligent escalation +- See V3_CHANGES.md for complete migration guide + +### v2.35 +- **Robust layout detection under varying light conditions** + - Changed Step 3 logic: instead of picking "2 largest boxes", find rightmost X in each half + - Splits detection area at X-center, finds max(rightX) for left half (SpO2) and right half (HR) + - Handles digit fragmentation: even if "95" breaks into "9" and "5" contours, still finds correct right edge + - Tracks Y range across all contours in each half for proper bounding +- **Raw frame archiving** + - Saves every processed frame to `raw_frames/raw_YYYYMMDD-NNNNN.png` (persistent directory) + - Enables offline testing and replay of detection algorithms + - Files are the rotated frames (after timestamp crop), exactly as fed to detection pipeline + - Not cleaned on restart - accumulates for historical analysis + +### v2.34 +- Every 4th frame processing (improved responsiveness) +- Simplified corruption handling (no immediate retry) +- Silent corruption detection (log only) +- Digit 3 detection offset: -8 pixels +- Invalid template: 2 patterns + +### Previous versions +- v2.33: Added hindsight validation for large deltas +- v2.32: Implemented low confidence retry with layout re-detection +- v2.31: Added invalid digit detection +- Earlier: Various contour detection and template matching improvements + +--- + +## Contact & Context + +**Development Environment:** macOS (local development and testing) +**Target Deployment:** Ubuntu 22.04 with Docker +**Home Assistant Version:** (check your setup) +**Camera:** (specify your RTSP camera model) + +**Project Location:** `/Users/johanjongsma/pulse-monitor/` + +--- + +## Quick Reference + +### Start monitoring +```bash +cd /Users/johanjongsma/pulse-monitor +./pulseox-monitor +``` + +### Stop and generate review +``` +Ctrl+C +# Opens review/review.html automatically +``` + +### Check logs +```bash +tail -f pulseox-monitor_*.log +``` + +### Approve training digit +```bash +# Move good digit image to training_digits/ +# Naming: {digit}_{variant}.png +``` + +### Mark as invalid/corruption +```bash +# Move bad pattern to training_digits/invalid/ +# Naming: descriptive name like "white_blob_2.png" +``` + +--- + +**Document Purpose:** This document serves as a comprehensive reference for understanding the project state, architecture, and reasoning. It should enable a new chat session (or developer) to quickly understand what's been built, why decisions were made, and how to continue development. + +**Last Verified Working:** November 17, 2025 (v3.57 - True raw frame saving) diff --git a/backups/backup_20251127/Systematic_Review_MRI_Brain_Sophia_REVISED.md b/backups/backup_20251127/Systematic_Review_MRI_Brain_Sophia_REVISED.md new file mode 100644 index 0000000..d000804 --- /dev/null +++ b/backups/backup_20251127/Systematic_Review_MRI_Brain_Sophia_REVISED.md @@ -0,0 +1,328 @@ +# Systematic Review of MRI Brain Report for Sophia + +**Patient:** Sophia Helena Jongsma +**Date of Birth:** January 1, 2017 +**Date of MRI:** May 6, 2022 +**Date of Review:** November 10, 2025 +**Original Radiologist:** Javier Quintana, MD + +--- + +## Author's Note + +This document was written by Johan Jongsma (Sophia's father) with the assistance of Claude Opus AI. While I am not a medical professional, I have strong analytical skills that I've applied to understanding my daughter's condition through systematic observation and reasoning. The MRI analysis referenced in this document was performed by Claude Opus AI based on the original radiology reports from May 6, 2022. + +--- + +## Original Radiologist's Report + +> IMPRESSION: In light of the history, imaging findings favored to reflect a degree of global hypoxic injury as discussed in detail. +> +> Indication: >72 hours post-cardiac arrest. Please evaluate Comparison Study Date: 5/6/2022 Technique: Multiplanar multisequence imaging of the brain was performed with and without contrast. +> +> Findings: There is moderate opacification of the paranasal sinuses.1 There is bilateral mastoid fluid.2 The brain is abnormal. Extensive areas of T2 prolongation are identified within the posterior upper cervical cord and brainstem3, midbrain, periaqueductal gray, thalami, basal ganglia4-7. Abnormal increased T2 signal with swelling is identified throughout much of the supratentorial cortex more noticeable in the parietal and occipital regions. This does affect the temporal and frontal lobes as well.8 Diffusion-weighted images are abnormal demonstrating restricted water motion along much of the cortical abnormalities near the gray-white matter junction. Restricted water motion within the basal ganglia and thalami is also noted to a degree. Involvement of some of the white matter is not entirely excluded as well.9-11 The lateral ventricles are symmetric and not particularly dilated and similar to this morning CT.12 The cavum septum pellucidum seen on the 5/2/2020 examination is however no longer visualized suggesting a degree of mass effect.13 However, suprasellar cisterns remain patent with probably similar appearance of the medial temporal lobes and cerebellar tonsils when accounting for the differences technique. The major intracranial flow-voids are grossly preserved. The cerebellar tonsils are rounded appearing and above the level of foramen magnum and again similar.14-16 The adenohypophysis is somewhat thin appearing which may be within anatomic range. Neurohypophysis is orthotopic. Corpus callosum is present. Subcutaneous edema is questioned within the face. As seen on the recent MRI, there does appear to be a degree of increased enhancement within the sulci bilaterally. However, given the underlying changes within the brain, this is favored to be related to congestion rather than leptomeningeal disease. Some degree of enhancement is questioned within the midbrain and periaqueductal gray as well as the inferior colliculus.17-23 + +**Note:** Superscript numbers have been added to the original report text for reference in the detailed analysis that follows. + +--- + +## CRITICAL DISCOVERY #1: Temporal Evolution of Cisterna Magna Enlargement + +### Comparative CT Analysis: May 2, 2022 vs May 6, 2022 + +A systematic comparison of CT scans reveals the most important finding was missed entirely by the radiologist: **the massive cisterna magna enlargement developed progressively over the 4 days following the incident.** + +#### May 2, 2022 CT (8:31 PM - Day of Incident) + +**Observations:** +- **Normal-sized posterior fossa CSF spaces** +- Brainstem in normal anatomic position +- No evidence of enlarged cisterna magna +- Cerebellar hemispheres normal in size and position +- No anterior compression or flattening of brainstem + +#### May 6, 2022 CT (8:49 AM - Four Days Post-Incident) + +**Observations:** +- **Massive enlargement of posterior fossa CSF space (cisterna magna)** +- Brainstem compressed and displaced anteriorly +- Cerebellar hemispheres pushed laterally by CSF accumulation +- Clear evidence of progressive mass effect + +#### May 6, 2022 MRI (Same Day as Second CT) + +**Observations:** +- Confirms massive cisterna magna enlargement +- Brainstem mechanically compressed and flattened +- FLAIR sequences show BLACK signal (CSF), NOT tissue edema + +### Clinical Significance + +This temporal sequence establishes that the enlarged cisterna magna is: + +1. **ACQUIRED** - Not present immediately after the incident +2. **PROGRESSIVE** - Developed over a 4-day period +3. **STRUCTURAL** - Represents CSF accumulation, not hypoxic tissue injury +4. **MECHANICAL** - Causing brainstem compression through mass effect + +--- + +## CRITICAL DISCOVERY #2: Acquired Cerebellar Tonsil Herniation + +### Comparative Analysis: May 2, 2022 vs May 6, 2022 + +In addition to the cisterna magna enlargement, systematic comparison reveals **progressive descent of the cerebellar tonsils** between May 2 and May 6, 2022. + +#### May 2, 2022 CT + +**Observations:** +- Cerebellar tonsils in **normal position** relative to foramen magnum +- Normal tapered configuration of tonsils +- No tonsillar herniation + +#### May 6, 2022 CT + +**Observations:** +- Cerebellar tonsils showing **rounded/blunted contour** +- Beginning descent toward foramen magnum + +#### May 6, 2022 MRI (SAG T1, slice 15/29) + +**Observations:** +- Cerebellar tonsils **descended/herniated** into or through the foramen magnum opening +- Acquired Chiari-like malformation +- Tonsils being compressed downward by the massive cisterna magna above + +### Clinical Significance of Tonsillar Herniation + +The progressive tonsillar descent is critical because: + +1. **Acquired, not congenital** - This is NOT a pre-existing Chiari malformation, but an acquired herniation caused by the enlarging cisterna magna pushing the cerebellum downward + +2. **CSF flow obstruction** - Herniated tonsils can block CSF circulation at the foramen magnum, potentially contributing to or perpetuating the cisterna magna enlargement + +3. **Direct neural compression** - Tonsils compress the brainstem and upper spinal cord, explaining: + - Autonomic dysfunction (breathing, heart rate abnormalities) + - Positional symptoms (inability to tolerate right-side positioning) + - Progressive neurological deterioration + +4. **Surgically addressable** - Posterior fossa decompression surgery can: + - Relieve tonsillar compression + - Restore CSF flow + - Address the cisterna magna + - Potentially reverse or improve symptoms + +### Clinical Context + +According to the patient's father, the medical team urgently performed imaging on May 6 because they were concerned about worsening pressure on the brainstem/spinal cord. The clinical team observed progressive neurological deterioration over those 4 days - which correlates directly with the progressive structural changes documented here: both the cisterna magna enlargement AND the tonsillar herniation were actively worsening. + +**These findings completely change the interpretation of Sophia's condition.** + +--- + +## Detailed Finding-by-Finding Analysis of May 6 MRI Report + +### Finding #1-2: Paranasal sinus opacification and mastoid fluid + +**Sequences reviewed:** AX T1 (slice 1/25) +**Observation:** Confirmed presence of fluid in sinuses and mastoid air cells +**Assessment:** Accurate but clinically irrelevant. Common in intubated ICU patients. + +### Finding #3: "T2 prolongation in posterior upper cervical cord and brainstem" + +**Sequences reviewed:** SAG T1 (15/29), AX T2 (7/25), AX FLAIR (7/25) +**Observation:** +- Massive CSF space posterior to brainstem (enlarged cisterna magna) +- Brainstem mechanically compressed/flattened anteriorly +- FLAIR shows BLACK signal confirming CSF, NOT tissue edema +- **May 2 CT shows this was NOT present initially** + +**Assessment:** **FUNDAMENTAL MISINTERPRETATION** - This is an acquired structural CSF enlargement that developed over 4 days, NOT hypoxic tissue injury as claimed. + +### Finding #4-7: Midbrain, periaqueductal gray, thalami, basal ganglia changes + +**Sequences reviewed:** AX T2 (10,12/25), AX FLAIR (8,9/25) +**Observation:** +- Some T2 hyperintensity confirmed in deep gray structures +- FLAIR confirms tissue changes (not just CSF effect) +- Pattern consistent with some injury but not extensive + +**Assessment:** Partially accurate, changes present but must be interpreted in context of progressive mechanical compression rather than purely hypoxic etiology. + +### Finding #8: "Cortical swelling throughout supratentorial regions" + +**Sequences reviewed:** AX FLAIR (10,12,14,20,22,24/25) +**Observation:** +- Sulci remain OPEN and visible +- Gray-white differentiation preserved +- Minimal signal changes, NO significant swelling + +**Assessment:** **GROSSLY OVERSTATED** - Minimal changes present, no significant swelling evident. + +### Finding #9-11: "Restricted diffusion in cortex and deep gray matter" + +**Sequences reviewed:** T2 weighted trace B1000 (13,23,44/56), Average DCTENSOR B1000 (13,23/56), Fractional Anisotropy B1000 (13,23/56) +**Observation:** +- DWI images essentially NEGATIVE - no significant restricted diffusion +- FA maps show preserved white matter tract integrity + +**Assessment:** **DEMONSTRABLY FALSE** - No evidence of extensive restricted diffusion. This directly contradicts the claim of severe acute hypoxic-ischemic injury. + +### Finding #12: Ventricles symmetric, not dilated + +**Assessment:** Accurate, normal finding. + +### Finding #13: Cavum septum pellucidum comparison to "5/2/2020" + +**Critical Issue:** References exam from "5/2/2020" - TWO YEARS before accident occurred (5/2/2022). Presumably meant to reference May 2, 2022 CT. +**Assessment:** **MAJOR ERROR** - Wrong date by exactly 2 years, indicating severe inattention to detail and destroying report credibility. + +### Finding #14-16: "Cerebellar tonsils rounded appearing and above the level of foramen magnum" + +**Sequences reviewed:** SAG T1 (especially slice 15/29) +**Observation:** +- Radiologist noted "rounded" appearance and claimed tonsils "above foramen magnum" +- Direct comparison with May 2 CT shows cerebellar tonsils were in normal position on May 2 +- May 6 MRI shows cerebellar tonsils have **descended/herniated** into or through the foramen magnum +- **This is acquired tonsillar herniation, NOT a stable finding** + +**Assessment:** **CRITICAL MISINTERPRETATION** - The radiologist noted abnormal tonsil appearance but: +1. Failed to recognize this as an acquired change (comparing to May 2 imaging) +2. Mischaracterized the tonsil position as "above foramen magnum" when MRI shows herniation +3. Missed the clinical significance of progressive tonsillar descent +4. This is acquired Chiari-like malformation from progressive compression, not a pre-existing condition + +### Finding #17-23: Various minor/questioned findings + +**Assessment:** Multiple vague "questioned" findings showing uncertainty throughout report. + +--- + +## Summary of Errors + +| Finding | Radiologist's Claim | Actual Finding | Clinical Impact | +|---------|-------------------|----------------|-----------------| +| **Timeline recognition** | Not addressed | **Cisterna magna AND tonsils changed May 2-6** | **Critical missed findings** | +| **Posterior brainstem "T2 prolongation"** | Hypoxic injury | **Acquired enlarged cisterna magna** | **Fundamental misdiagnosis** | +| **Cerebellar tonsils** | "Rounded, above foramen magnum" | **Acquired herniation/descent** | **Missed progressive compression** | +| Deep gray matter changes | Present | Partially accurate | Some changes present | +| **Cortical "swelling"** | Extensive | **Minimal - Exaggerated** | Overstated severity | +| **Restricted diffusion** | Extensive | **FALSE - DWI negative** | **Major error contradicting hypoxic diagnosis** | +| **Date reference** | 5/2/2020 exam | **Wrong by 2 years** | **Credibility destroyed** | +| Structural findings | Various | Misinterpreted | Mechanical vs hypoxic confusion | + +--- + +## What the Report SHOULD Have Said + +> **IMPRESSION:** Serial imaging demonstrates progressive structural abnormalities in the posterior fossa developing between May 2 and May 6, 2022, including marked enlargement of the cisterna magna and acquired cerebellar tonsillar herniation, resulting in brainstem and spinal cord compression. The absence of significant restricted diffusion on DWI sequences argues strongly against extensive acute hypoxic-ischemic injury as the primary pathology. Mild T2 signal changes in deep gray structures may represent limited injury, but the dominant findings are progressive structural/mechanical pathology rather than hypoxic injury. +> +> **COMPARISON:** CT brain May 2, 2022 shows normal posterior fossa anatomy with normal cerebellar tonsil position. Current study (May 6, 2022) demonstrates dramatic interval development of: +> 1. Markedly enlarged cisterna magna +> 2. Acquired cerebellar tonsillar herniation +> +> **KEY FINDINGS:** +> +> 1. **Progressive cisterna magna enlargement** - Developed between May 2 and May 6, 2022 (documented on serial CT) +> 2. **Acquired cerebellar tonsillar herniation** - Tonsils descended from normal position (May 2) to herniation through foramen magnum (May 6), creating Chiari-like malformation +> 3. **Acquired brainstem compression** - Anterior displacement and flattening of brainstem due to posterior CSF accumulation +> 4. **CSF flow obstruction** - Herniated tonsils blocking CSF circulation at foramen magnum level +> 5. Mild T2 hyperintensity in bilateral thalami and basal ganglia without corresponding restricted diffusion +> 6. **No significant cortical edema or swelling** - sulci remain visible, gray-white differentiation preserved +> 7. **No evidence of extensive acute hypoxic-ischemic injury** on diffusion-weighted imaging +> 8. Preserved white matter tract integrity on fractional anisotropy maps +> +> **CLINICAL CORRELATION:** +> The progressive nature of both the cisterna magna enlargement and tonsillar herniation over 4 days, accompanied by clinical deterioration prompting urgent imaging, suggests an active structural process rather than immediate traumatic or hypoxic injury. The mechanism may represent: +> - Post-traumatic arachnoid disruption with progressive CSF accumulation +> - Impaired CSF circulation/resorption exacerbated by tonsillar herniation +> - Progressive posterior fossa pressure changes causing secondary tonsillar descent +> +> **RECOMMENDATION:** URGENT neurosurgical consultation for evaluation of: +> 1. Acquired cisterna magna enlargement +> 2. Acquired cerebellar tonsillar herniation (Chiari-like malformation) +> 3. Brainstem and spinal cord compression +> +> The progressive nature of these findings over 4 days with accompanying clinical deterioration suggests an evolving process requiring prompt neurosurgical assessment. Posterior fossa decompression may be indicated to: +> - Relieve tonsillar herniation +> - Restore CSF flow +> - Decompress brainstem +> - Address cisterna magna +> - Potentially reverse or improve neurological status + +--- + +## Conclusion + +This systematic review reveals fundamental errors in the original radiological interpretation that completely missed TWO critical progressive findings: + +### Primary Findings: Progressive Acquired Structural Abnormalities + +**1. The enlarged cisterna magna developed between May 2 and May 6, 2022.** +**2. Cerebellar tonsil herniation developed between May 2 and May 6, 2022.** + +This is documented by direct comparison of serial imaging showing: +- Normal posterior fossa anatomy on May 2, 2022 (day of incident) +- Normal cerebellar tonsil position on May 2, 2022 +- Massive cisterna magna enlargement by May 6, 2022 (4 days later) +- Cerebellar tonsillar herniation by May 6, 2022 (4 days later) + +This temporal evolution establishes that we are dealing with **acquired, progressive structural processes** - not pre-existing anatomical variants, and not immediate traumatic injury. + +### Critical Errors in Original Report + +1. **Failure to recognize temporal evolution:** The radiologist had access to the May 2 CT for comparison but failed to identify or comment on the dramatic interval development of BOTH the cisterna magna enlargement AND the cerebellar tonsillar herniation. + +2. **Misidentification of pathology:** The most striking findings - a progressively enlarging cisterna magna causing brainstem compression and progressive tonsillar herniation - were misinterpreted as "T2 prolongation" from hypoxic injury and static "rounded" tonsils. The FLAIR sequences clearly show BLACK signal (CSF), not the bright signal expected from tissue edema. + +3. **False diffusion findings:** The report claimed "extensive restricted diffusion" throughout the brain. Direct review of DWI sequences shows they are essentially negative. This directly contradicts the diagnosis of severe acute hypoxic-ischemic injury. + +4. **Basic factual errors:** The report references a comparison study from "5/2/2020" - exactly two years before the actual incident. This appears to be a typo for 5/2/2022, but such an error in a critical report indicates severe inattention to detail. + +5. **Exaggeration of findings:** Claimed "extensive swelling" where minimal changes exist. + +6. **Mischaracterization of tonsillar position:** Described tonsils as "above the level of foramen magnum" when imaging shows herniation into or through the foramen magnum. + +### Clinical Implications + +**Sophia has acquired, progressive posterior fossa pathology consisting of:** +1. **Cisterna magna enlargement** causing brainstem compression +2. **Cerebellar tonsillar herniation** (acquired Chiari-like malformation) causing: + - CSF flow obstruction at foramen magnum + - Direct brainstem and upper spinal cord compression + - Autonomic dysfunction + - Positional symptoms + +**These conditions:** +- Developed progressively over 4 days following the incident +- Represent structural/mechanical problems rather than purely hypoxic injury +- May be amenable to neurosurgical intervention (posterior fossa decompression) +- Have completely different prognostic implications than diffuse hypoxic-ischemic injury + +**The absence of significant restricted diffusion on DWI sequences argues strongly against extensive acute hypoxic-ischemic injury as the primary diagnosis.** The radiologist's conclusion of "global hypoxic injury" is not supported by the imaging findings and completely misses the progressive structural pathology that is the dominant finding. + +### Mechanism of Injury: Implications + +The progressive development of both cisterna magna enlargement and tonsillar herniation between May 2 and May 6 suggests several possible mechanisms: + +1. **Post-traumatic CSF accumulation** - Disruption of arachnoid membranes or posterior fossa structures allowing CSF to accumulate, with increasing pressure causing downward cerebellar displacement and tonsillar herniation + +2. **Impaired CSF circulation** - Damage to CSF resorption pathways leading to focal accumulation, with subsequent mass effect causing tonsillar descent and further CSF flow obstruction, creating a progressive cycle + +3. **Combined mechanism** - Initial CSF accumulation leading to tonsillar herniation, which then further impairs CSF flow and perpetuates the enlargement + +The key point is that these are **progressive, acquired processes that may be reversible or amenable to treatment** - fundamentally different from the fixed, irreversible hypoxic injury diagnosis that was given. + +**This analysis fundamentally changes the understanding of Sophia's condition and has profound implications for both prognosis and treatment options, including the critical potential role for neurosurgical intervention to address:** +- Mechanical brainstem compression from cisterna magna +- Cerebellar tonsillar herniation and CSF flow obstruction +- Progressive posterior fossa pathology that was actively worsening and prompted urgent clinical concern + +The medical team's urgent imaging on May 6 due to concerns about "worsening pressure" was clinically appropriate - they were observing the effects of these progressive structural changes. The tragedy is that the radiologist completely missed documenting this progressive pathology, instead attributing everything to irreversible hypoxic injury. + +--- + +**Document prepared by:** Johan Jongsma +**Date:** November 10, 2025 +**Assistance:** Claude Opus AI +**Purpose:** Medical consultation with neurology specialist diff --git a/backups/backup_20251127/build.sh b/backups/backup_20251127/build.sh new file mode 100755 index 0000000..fe446da --- /dev/null +++ b/backups/backup_20251127/build.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Build script for pulseox-monitor v3.60 + +echo "=== Building PulseOx Monitor v3.60 (Unified Detection) ===" +echo "" + +# List of all source files needed +FILES="pulseox-monitor.go types.go helpers.go frame_source.go processor.go validators.go ocr.go layout_detection.go detection.go config.go homeassistant.go html_report.go timestamp_ocr.go" + +# Check if all files exist +MISSING=0 +for file in $FILES; do + if [ ! -f "$file" ]; then + echo "āŒ Missing file: $file" + MISSING=1 + fi +done + +if [ $MISSING -eq 1 ]; then + echo "" + echo "Error: Missing source files. Please ensure all files are present." + exit 1 +fi + +echo "āœ“ All source files present" +echo "" +echo "Compiling..." + +# Build +go build -o pulseox-monitor $FILES + +if [ $? -eq 0 ]; then + echo "" + echo "āœ“ Build successful: pulseox-monitor" + echo "" + echo "Test with:" + echo " ./pulseox-monitor raw_frames/raw_20251028-00123.png # Single frame" + echo " ./pulseox-monitor # Live stream" + echo "" +else + echo "" + echo "āŒ Build failed" + exit 1 +fi diff --git a/backups/backup_20251127/config.go b/backups/backup_20251127/config.go new file mode 100644 index 0000000..4c74768 --- /dev/null +++ b/backups/backup_20251127/config.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +type Config struct { + Camera struct { + RTSPURL string `yaml:"rtsp_url"` + } `yaml:"camera"` + HomeAssistant struct { + URL string `yaml:"url"` + Token string `yaml:"token"` + } `yaml:"home_assistant"` + Processing struct { + SampleInterval int `yaml:"sample_interval"` + ChangeThresholdPercent int `yaml:"change_threshold_percent"` + MaxDriftSeconds int `yaml:"max_drift_seconds"` + } `yaml:"processing"` + Logging struct { + Level string `yaml:"level"` + File string `yaml:"file"` + } `yaml:"logging"` +} + +func LoadConfig(filename string) (*Config, error) { + data, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("failed to read config file: %w", err) + } + + var config Config + if err := yaml.Unmarshal(data, &config); err != nil { + return nil, fmt.Errorf("failed to parse config file: %w", err) + } + + return &config, nil +} diff --git a/backups/backup_20251127/config.yaml b/backups/backup_20251127/config.yaml new file mode 100644 index 0000000..edfe6fc --- /dev/null +++ b/backups/backup_20251127/config.yaml @@ -0,0 +1,25 @@ +# Pulse Oximeter Monitor Configuration + +camera: + # Tapo C110 RTSP URL (pulse ox monitoring camera) + rtsp_url: "rtsp://tapohass:!!Helder06@192.168.2.183:554/stream1" + +home_assistant: + # TS140 server IP where HASS is hosted + url: "http://192.168.1.252:8123" + # Long-lived access token for pulse-monitor + token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiIzOTMxMTA4MjczYzI0NDU1YjIzOGJlZWE0Y2NkM2I1OCIsImlhdCI6MTc2MTExNTQxNywiZXhwIjoyMDc2NDc1NDE3fQ.URFS4M0rX78rW27gQuBX-PyrPYMLlGujF16jIBHXYOw" + +processing: + # How often to capture and process frames (seconds) + sample_interval: 1 + # Reject readings that change more than this percent + change_threshold_percent: 5 + # Maximum allowed drift INCREASE in seconds (detects camera lag/freeze) + # Camera can be 10s behind server consistently - that's fine + # But if drift increases by more than this, frame is stale + max_drift_seconds: 3 + +logging: + level: "info" # debug, info, warn, error + file: "./logs/pulse-monitor.log" \ No newline at end of file diff --git a/backups/backup_20251127/create_backup.sh b/backups/backup_20251127/create_backup.sh new file mode 100755 index 0000000..46b1f41 --- /dev/null +++ b/backups/backup_20251127/create_backup.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Backup script for pulse-monitor before refactoring +# Created: 2025-10-30 + +BACKUP_DIR="backups/backup_20251030_043342" +mkdir -p "$BACKUP_DIR" + +# Copy all source files +cp pulse-monitor.go "$BACKUP_DIR/" +cp ocr.go "$BACKUP_DIR/" +cp layout_detection.go "$BACKUP_DIR/" +cp normalize.go "$BACKUP_DIR/" +cp config.go "$BACKUP_DIR/" +cp homeassistant.go "$BACKUP_DIR/" +cp html_report.go "$BACKUP_DIR/" +cp PROJECT_STATE.md "$BACKUP_DIR/" +cp config.yaml "$BACKUP_DIR/" + +echo "āœ“ Backup completed to $BACKUP_DIR" +ls -lh "$BACKUP_DIR" diff --git a/backups/backup_20251127/detection.go b/backups/backup_20251127/detection.go new file mode 100644 index 0000000..2d95760 --- /dev/null +++ b/backups/backup_20251127/detection.go @@ -0,0 +1,1061 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "math" + "sort" + + "gocv.io/x/gocv" +) + +// DEBUG flag is declared in types.go + +// Band represents a vertical band of contours +type Band struct { + minX int + maxX int + minY int + maxY int +} + +// DetectBands finds and groups contours into vertical bands +// Only returns bands with height >= minHeight pixels +func DetectBands(binary gocv.Mat, minHeight int) []Band { + type Box struct { + minX int + maxX int + minY int + maxY int + } + + // Find contours + contours := gocv.FindContours(binary, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + // Get all bounding boxes + boxes := []Box{} + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + boxes = append(boxes, Box{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y, + maxY: rect.Max.Y, + }) + } + + // Sort boxes by minY + sort.Slice(boxes, func(i, j int) bool { + return boxes[i].minY < boxes[j].minY + }) + + // Group overlapping/adjacent boxes into vertical bands + bands := []Band{} + + for _, box := range boxes { + // Skip boxes shorter than minHeight + height := box.maxY - box.minY + if height < minHeight { + continue + } + + if len(bands) == 0 { + // First band + bands = append(bands, Band{ + minX: box.minX, + maxX: box.maxX, + minY: box.minY, + maxY: box.maxY, + }) + } else { + lastBand := &bands[len(bands)-1] + + // Check if this box overlaps or is close to last band (within 5px) + if box.minY <= lastBand.maxY+5 { + // Merge: extend the band in both X and Y + if box.minX < lastBand.minX { + lastBand.minX = box.minX + } + if box.maxX > lastBand.maxX { + lastBand.maxX = box.maxX + } + if box.maxY > lastBand.maxY { + lastBand.maxY = box.maxY + } + } else { + // New band + bands = append(bands, Band{ + minX: box.minX, + maxX: box.maxX, + minY: box.minY, + maxY: box.maxY, + }) + } + } + } + + return bands +} + +// findBaseline analyzes the baseline in the graph band +// Returns left point, right point, width, number of sections, and error +func findBaseline(binary gocv.Mat, graphBand Band) (*image.Point, *image.Point, int, int, error) { + // Extract just the graph band region + graphRegion := binary.Region(image.Rect(0, graphBand.minY, binary.Cols(), graphBand.maxY+1)) + + // Find all contours in graph region + contours := gocv.FindContours(graphRegion, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + // Get sections (contours) with width >= 5px + type Section struct { + minX int + maxX int + minY int + maxY int + } + sections := []Section{} + + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + width := rect.Max.X - rect.Min.X + height := rect.Max.Y - rect.Min.Y + // Filter: width >= 5px AND height >= 80px + if width >= 5 && height >= 80 { + sections = append(sections, Section{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y + graphBand.minY, // Adjust to full image coordinates + maxY: rect.Max.Y + graphBand.minY, + }) + } + } + + // Check if we have exactly 2 sections + if len(sections) != 2 { + return nil, nil, 0, len(sections), fmt.Errorf("found %d sections (need exactly 2)", len(sections)) + } + + // Sort sections by X position (left to right) + sort.Slice(sections, func(i, j int) bool { + return sections[i].minX < sections[j].minX + }) + + // Horizontal width (left edge of left section to right edge of right section) + horizontalWidth := sections[1].maxX - sections[0].minX + + // Calculate scale factor for offset (target 860px) + scale := 860.0 / float64(horizontalWidth) + offset := int(3.0 * scale) + if offset < 3 { + offset = 3 + } + + // Find rotation points + // Left section: lowest white pixel offset from left edge + leftSection := sections[0] + leftX := leftSection.minX + offset + leftPoint := findLowestWhitePixel(binary, leftX, graphBand.minY, graphBand.maxY) + + // Right section: lowest white pixel offset from right edge + rightSection := sections[1] + rightX := rightSection.maxX - offset + rightPoint := findLowestWhitePixel(binary, rightX, graphBand.minY, graphBand.maxY) + + if leftPoint == nil || rightPoint == nil { + return nil, nil, 0, len(sections), fmt.Errorf("could not find rotation endpoints") + } + + return leftPoint, rightPoint, horizontalWidth, len(sections), nil +} + +// findLowestWhitePixel finds the lowest (max Y) white pixel at given X within Y range +func findLowestWhitePixel(binary gocv.Mat, x int, minY int, maxY int) *image.Point { + lowestY := -1 + for y := minY; y <= maxY; y++ { + if binary.GetUCharAt(y, x) == 255 { + lowestY = y + } + } + if lowestY == -1 { + return nil + } + return &image.Point{X: x, Y: lowestY} +} + +// DetectionResult contains rotation and width detection results +type DetectionResult struct { + Rotation float64 + Width int + ScaleFactor float64 // Scale factor to apply (860 / width) + SpO2 image.Rectangle // SpO2 display area in SCALED, ROTATED coordinates + HR image.Rectangle // HR display area in SCALED, ROTATED coordinates + Success bool +} + +// PreprocessFrame takes a raw frame and prepares it for detection +// Returns the processed binary image +func PreprocessFrame(frame gocv.Mat) gocv.Mat { + // Crop top 68 pixels (timestamp) + cropped := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + + // Rotate 90° clockwise + rotated := gocv.NewMat() + gocv.Rotate(cropped, &rotated, gocv.Rotate90Clockwise) + + // Convert to grayscale + gray := gocv.NewMat() + gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray) + rotated.Close() + + // Threshold to binary at 240 (matching main pipeline) + binary := gocv.NewMat() + gocv.Threshold(gray, &binary, 240, 255, gocv.ThresholdBinary) + gray.Close() + + return binary +} + +// DetectRotationAndWidth analyzes a binary frame and returns rotation angle, width, and display areas +func DetectRotationAndWidth(binary gocv.Mat) DetectionResult { + // ========== PHASE 1: Detect rotation & scale from baseline ========== + + // Detect bands (using 80px minimum height for original scale) + bands := DetectBands(binary, 80) + + // Check if we have enough bands + if len(bands) < 2 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - only %d bands found (need at least 2)\n", len(bands)) + } + return DetectionResult{Success: false} + } + + // Find TALLEST band (graph/waveform display) + tallestIdx := 0 + tallestHeight := 0 + for i, band := range bands { + height := band.maxY - band.minY + if height > tallestHeight { + tallestHeight = height + tallestIdx = i + } + } + + // Check if there's a band after the tallest (for digits) + if tallestIdx >= len(bands)-1 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - tallest band is last (idx=%d, total=%d)\n", tallestIdx, len(bands)) + } + return DetectionResult{Success: false} + } + + graphBand := bands[tallestIdx] // Tallest = graph/waveform + + // Find baseline + leftPoint, rightPoint, width, sections, err := findBaseline(binary, graphBand) + if err != nil || sections != 2 { + if DEBUG { + if err != nil { + fmt.Printf("DEBUG: Detection failed - baseline error: %v\n", err) + } else { + fmt.Printf("DEBUG: Detection failed - found %d sections (need exactly 2)\n", sections) + } + } + return DetectionResult{Success: false} + } + + // Calculate rotation angle from baseline points + deltaY := float64(rightPoint.Y - leftPoint.Y) + deltaX := float64(rightPoint.X - leftPoint.X) + rotation := math.Atan2(deltaY, deltaX) * 180 / math.Pi + + // DEBUG: Visualize the baseline points + if DEBUG { + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + + // Draw left point (green) + gocv.Circle(&viz, *leftPoint, 10, color.RGBA{0, 255, 0, 255}, -1) + // Draw right point (red) + gocv.Circle(&viz, *rightPoint, 10, color.RGBA{0, 0, 255, 255}, -1) + // Draw line between them (yellow) + gocv.Line(&viz, *leftPoint, *rightPoint, color.RGBA{0, 255, 255, 255}, 2) + // Draw graph band boundaries + gocv.Line(&viz, image.Pt(0, graphBand.minY), image.Pt(viz.Cols(), graphBand.minY), color.RGBA{255, 0, 255, 255}, 1) + gocv.Line(&viz, image.Pt(0, graphBand.maxY), image.Pt(viz.Cols(), graphBand.maxY), color.RGBA{255, 0, 255, 255}, 1) + + gocv.IMWrite("debug_baseline_points.png", viz) + viz.Close() + fmt.Println("DEBUG: Saved baseline points visualization to debug_baseline_points.png") + } + + if DEBUG { + fmt.Printf("DEBUG: Baseline points: Left(%d,%d) Right(%d,%d)\n", + leftPoint.X, leftPoint.Y, rightPoint.X, rightPoint.Y) + fmt.Printf("DEBUG: Delta Y=%d, Delta X=%d, Rotation=%.3f°\n", + rightPoint.Y-leftPoint.Y, rightPoint.X-leftPoint.X, rotation) + } + + // Calculate scale factor (860 / detected width) + scaleFactor := 860.0 / float64(width) + + if DEBUG { + fmt.Printf("DEBUG: Width=%dpx, Scale factor=%.3f (baseline will become 860px)\n", + width, scaleFactor) + } + + // ========== PHASE 2: Apply transforms ========== + + // Rotate the image to correct tilt + rotated := RotateImage(binary, rotation) + defer rotated.Close() + + // Scale to 860px baseline width + scaled := ScaleByFactor(rotated, scaleFactor) + defer scaled.Close() + + if DEBUG { + fmt.Printf("DEBUG: Transformed image size: %dx%d\n", scaled.Cols(), scaled.Rows()) + } + + // ========== PHASE 3: Detect display areas on transformed image ========== + + // Re-detect bands on scaled image (adjust minHeight for scale) + scaledMinHeight := int(80.0 * scaleFactor) + scaledBands := DetectBands(scaled, scaledMinHeight) + + if len(scaledBands) < 2 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - only %d bands found on scaled image\n", len(scaledBands)) + } + return DetectionResult{Success: false} + } + + // Find tallest band again on scaled image + scaledTallestIdx := 0 + scaledTallestHeight := 0 + for i, band := range scaledBands { + height := band.maxY - band.minY + if height > scaledTallestHeight { + scaledTallestHeight = height + scaledTallestIdx = i + } + } + + // Check if there's a digit band after the graph band + if scaledTallestIdx >= len(scaledBands)-1 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - no digit band after graph on scaled image\n") + } + return DetectionResult{Success: false} + } + + digitBand := scaledBands[scaledTallestIdx+1] + + if DEBUG { + fmt.Printf("DEBUG: Digit band on scaled image: Y[%d-%d], Height=%dpx\n", + digitBand.minY, digitBand.maxY, digitBand.maxY-digitBand.minY) + } + + // Extract digit band region + digitRegion := ExtractBandRegion(scaled, digitBand) + defer digitRegion.Close() + + // Find digit boxes + digitBoxes := FindDigitBoxes(digitRegion, scaledMinHeight) + + if len(digitBoxes) < 2 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - only %d digit boxes found (need at least 2)\n", len(digitBoxes)) + } + return DetectionResult{Success: false} + } + + // Merge digit boxes into SpO2/HR areas + displayAreas := MergeDigitBoxesIntoDisplays(digitRegion, digitBoxes) + + // CRITICAL: Expand boxes to CUT_WIDTH (280px) using right edge as anchor + // OCR code expects exactly 280px wide boxes + const CUT_WIDTH = 280 + + // SpO2: keep right edge, expand left + spo2RightX := displayAreas.SpO2.Max.X + spo2LeftX := spo2RightX - CUT_WIDTH + if spo2LeftX < 0 { + spo2LeftX = 0 + } + + // HR: keep right edge, expand left + hrRightX := displayAreas.HR.Max.X + hrLeftX := hrRightX - CUT_WIDTH + if hrLeftX < 0 { + hrLeftX = 0 + } + + if DEBUG { + fmt.Printf("DEBUG: Expanded SpO2 from width %d to %d (CUT_WIDTH)\n", + displayAreas.SpO2.Dx(), CUT_WIDTH) + fmt.Printf("DEBUG: Expanded HR from width %d to %d (CUT_WIDTH)\n", + displayAreas.HR.Dx(), CUT_WIDTH) + } + + // Convert coordinates from digit-region space to full-image space + // (add digitBand.minY offset to Y coordinates) + // Use expanded X coordinates (CUT_WIDTH) + spo2Rect := image.Rect( + spo2LeftX, + displayAreas.SpO2.Min.Y + digitBand.minY, + spo2RightX, + displayAreas.SpO2.Max.Y + digitBand.minY, + ) + hrRect := image.Rect( + hrLeftX, + displayAreas.HR.Min.Y + digitBand.minY, + hrRightX, + displayAreas.HR.Max.Y + digitBand.minY, + ) + + if DEBUG { + fmt.Printf("DEBUG: SpO2 display area (full coords): X[%d-%d] Y[%d-%d]\n", + spo2Rect.Min.X, spo2Rect.Max.X, spo2Rect.Min.Y, spo2Rect.Max.Y) + fmt.Printf("DEBUG: HR display area (full coords): X[%d-%d] Y[%d-%d]\n", + hrRect.Min.X, hrRect.Max.X, hrRect.Min.Y, hrRect.Max.Y) + } + + return DetectionResult{ + Rotation: rotation, + Width: width, + ScaleFactor: scaleFactor, + SpO2: spo2Rect, + HR: hrRect, + Success: true, + } +} + +// VisualizeBands draws bands on an image with labels (only in DEBUG mode) +func VisualizeBands(binary gocv.Mat, bands []Band, tallestIdx int, filename string) { + if !DEBUG { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + blue := color.RGBA{R: 0, G: 0, B: 255, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + + // Draw all bands + for i, band := range bands { + bandColor := red + if i == tallestIdx { + bandColor = green // Graph band in green + } else if tallestIdx < len(bands)-1 && i == tallestIdx+1 { + bandColor = blue // Numbers band in blue + } + + rect := image.Rect(0, band.minY, viz.Cols(), band.maxY) + gocv.Rectangle(&viz, rect, bandColor, 2) + + label := fmt.Sprintf("Band #%d", i+1) + gocv.PutText(&viz, label, image.Pt(10, band.minY+20), + gocv.FontHersheyPlain, 1.5, yellow, 2) + } + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved visualization to %s\n", filename) +} + +// VisualizeDigitBand highlights the digit band specifically +func VisualizeDigitBand(binary gocv.Mat, bands []Band, tallestIdx int, filename string) { + if !DEBUG || tallestIdx >= len(bands)-1 { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + blue := color.RGBA{R: 0, G: 0, B: 255, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + + // Draw all bands with specific highlighting + for i, band := range bands { + bandColor := red + lineThickness := 1 + labelText := fmt.Sprintf("Band #%d", i+1) + + if i == tallestIdx { + bandColor = green + labelText = "GRAPH BAND" + } else if i == tallestIdx+1 { + // DIGIT BAND - highlight strongly + bandColor = cyan + lineThickness = 3 + labelText = fmt.Sprintf("DIGIT BAND [Y: %d-%d]", band.minY, band.maxY) + } + + rect := image.Rect(0, band.minY, viz.Cols(), band.maxY) + gocv.Rectangle(&viz, rect, bandColor, lineThickness) + + // Add label + labelColor := yellow + if i == tallestIdx+1 { + labelColor = cyan + } + gocv.PutText(&viz, labelText, image.Pt(10, band.minY+20), + gocv.FontHersheyPlain, 1.5, labelColor, 2) + } + + // Draw additional markers for digit band + if tallestIdx < len(bands)-1 { + digitBand := bands[tallestIdx+1] + + // Draw vertical lines at digit band boundaries + for x := 50; x < viz.Cols(); x += 100 { + gocv.Line(&viz, + image.Pt(x, digitBand.minY), + image.Pt(x, digitBand.maxY), + blue, 1) + } + + // Add dimension text + height := digitBand.maxY - digitBand.minY + dimText := fmt.Sprintf("Height: %dpx, Width: 860px", height) + gocv.PutText(&viz, dimText, image.Pt(10, digitBand.maxY+30), + gocv.FontHersheyPlain, 1.2, cyan, 2) + } + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved digit band visualization to %s\n", filename) +} + +// ValidateBaselineTwoSegments checks if the baseline consists of two separate segments +// by counting black-to-white transitions along the baseline (should be exactly 1) +func ValidateBaselineTwoSegments(binary gocv.Mat, leftPoint, rightPoint *image.Point) bool { + // Calculate slope for Y interpolation along the baseline + deltaY := float64(rightPoint.Y - leftPoint.Y) + deltaX := float64(rightPoint.X - leftPoint.X) + slope := deltaY / deltaX + + if DEBUG { + fmt.Printf("DEBUG: Validating baseline segments (slope=%.4f) from X=%d to X=%d\n", + slope, leftPoint.X, rightPoint.X) + } + + // Count black-to-white transitions + transitions := 0 + lastWasWhite := false + + for x := leftPoint.X; x <= rightPoint.X; x++ { + // Interpolate Y position along the baseline, then go 2px up (away from thick edge) + y := leftPoint.Y + int(float64(x-leftPoint.X)*slope) - 2 + isWhite := binary.GetUCharAt(y, x) == 255 + + // Detect black-to-white transition + if isWhite && !lastWasWhite { + transitions++ + } + lastWasWhite = isWhite + } + + if DEBUG { + fmt.Printf("DEBUG: Found %d black-to-white transitions\n", transitions) + } + + // We need exactly 1 black-to-white transition for 2 segments + // (entering the second segment after the gap) + if transitions != 1 { + if DEBUG { + fmt.Printf("DEBUG: INVALID - Need exactly 1 transition, found %d\n", transitions) + } + return false + } + + if DEBUG { + fmt.Println("DEBUG: VALID - Baseline has two distinct segments") + } + return true +} + +// RotateImage rotates an image by the given angle in degrees +// The angle is the detected rotation that needs to be corrected +func RotateImage(img gocv.Mat, angleDegrees float64) gocv.Mat { + // Get image center + center := image.Point{ + X: img.Cols() / 2, + Y: img.Rows() / 2, + } + + // Apply rotation to correct the tilt + // NOTE: If image rotates wrong direction, change to: -angleDegrees + rotMat := gocv.GetRotationMatrix2D(center, angleDegrees, 1.0) + defer rotMat.Close() + + // Apply rotation + rotated := gocv.NewMat() + gocv.WarpAffine(img, &rotated, rotMat, image.Point{X: img.Cols(), Y: img.Rows()}) + + if DEBUG { + fmt.Printf("DEBUG: Applied rotation: %.3f°\n", angleDegrees) + } + + return rotated +} + +// VisualizeSimpleBands draws just the graph and digit bands clearly +func VisualizeSimpleBands(binary gocv.Mat, graphBand, digitBand Band, filename string) { + if !DEBUG { + return + } + + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + + // Draw graph band in green + graphRect := image.Rect(0, graphBand.minY, viz.Cols(), graphBand.maxY) + gocv.Rectangle(&viz, graphRect, green, 2) + gocv.PutText(&viz, "GRAPH BAND", image.Pt(10, graphBand.minY+20), + gocv.FontHersheyPlain, 1.5, green, 2) + + // Draw digit band in cyan with thick border + digitRect := image.Rect(0, digitBand.minY, viz.Cols(), digitBand.maxY) + gocv.Rectangle(&viz, digitRect, cyan, 3) + label := fmt.Sprintf("DIGIT BAND [Y: %d-%d, H: %dpx]", + digitBand.minY, digitBand.maxY, digitBand.maxY-digitBand.minY) + gocv.PutText(&viz, label, image.Pt(10, digitBand.minY+20), + gocv.FontHersheyPlain, 1.5, cyan, 2) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved 2-band visualization to %s\n", filename) +} + +// ExtractBandRegion extracts a specific band region from an image +func ExtractBandRegion(img gocv.Mat, band Band) gocv.Mat { + // Create a region of interest (ROI) for the band + rect := image.Rect(0, band.minY, img.Cols(), band.maxY) + region := img.Region(rect) + + // Clone the region to create an independent Mat + extracted := gocv.NewMat() + region.CopyTo(&extracted) + + if DEBUG { + fmt.Printf("DEBUG: Extracted band region [%d-%d], size: %dx%d\n", + band.minY, band.maxY, extracted.Cols(), extracted.Rows()) + } + + return extracted +} + +// FindDigitBoxes finds individual digit contours within a region +// Returns bounding rectangles for digits larger than minSize +func FindDigitBoxes(digitRegion gocv.Mat, minSize int) []image.Rectangle { + contours := gocv.FindContours(digitRegion, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + var digitBoxes []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + height := rect.Max.Y - rect.Min.Y + + // Filter by minimum size - height only + if height >= minSize { + digitBoxes = append(digitBoxes, rect) + } + } + + // Sort boxes by X position (left to right) + sort.Slice(digitBoxes, func(i, j int) bool { + return digitBoxes[i].Min.X < digitBoxes[j].Min.X + }) + + if DEBUG { + fmt.Printf("DEBUG: Found %d digit contours (H>=%dpx)\n", len(digitBoxes), minSize) + } + + return digitBoxes +} + +// VisualizeDigitBoxes draws boxes around detected digits with numbers +func VisualizeDigitBoxes(digitRegion gocv.Mat, boxes []image.Rectangle, yOffset int, filename string) { + if !DEBUG { + return + } + + // Create visualization + viz := gocv.NewMat() + gocv.CvtColor(digitRegion, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + // Colors for digit boxes + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + magenta := color.RGBA{R: 255, G: 0, B: 255, A: 255} + + // Array of colors to cycle through + colors := []color.RGBA{red, yellow, green, cyan, magenta} + + // Draw each digit box + for i, box := range boxes { + // Cycle through colors + boxColor := colors[i%len(colors)] + + // Draw rectangle around digit + gocv.Rectangle(&viz, box, boxColor, 2) + + // Add digit number label + label := fmt.Sprintf("%d", i+1) + labelPos := image.Pt(box.Min.X+2, box.Min.Y-5) + if labelPos.Y < 10 { + // If label would be outside image, put it inside the box + labelPos.Y = box.Min.Y + 15 + } + gocv.PutText(&viz, label, labelPos, + gocv.FontHersheyPlain, 1.2, boxColor, 2) + + // Show dimensions + width := box.Max.X - box.Min.X + height := box.Max.Y - box.Min.Y + dimText := fmt.Sprintf("%dx%d", width, height) + gocv.PutText(&viz, dimText, image.Pt(box.Min.X, box.Max.Y+12), + gocv.FontHersheyPlain, 0.8, boxColor, 1) + } + + // Add summary text + summary := fmt.Sprintf("Found %d digits (Y offset: %d)", len(boxes), yOffset) + gocv.PutText(&viz, summary, image.Pt(5, viz.Rows()-10), + gocv.FontHersheyPlain, 1.0, green, 1) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved digit boxes visualization to %s\n", filename) +} + +// VisualizeDigitBoxesOnFull draws digit boxes on the full scaled image +func VisualizeDigitBoxesOnFull(fullImage gocv.Mat, boxes []image.Rectangle, digitBand Band, filename string) { + if !DEBUG { + return + } + + // Create visualization + viz := gocv.NewMat() + gocv.CvtColor(fullImage, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + // Colors + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + magenta := color.RGBA{R: 255, G: 0, B: 255, A: 255} + + colors := []color.RGBA{red, yellow, green, cyan, magenta} + + // Draw the digit band outline + bandRect := image.Rect(0, digitBand.minY, viz.Cols(), digitBand.maxY) + gocv.Rectangle(&viz, bandRect, cyan, 1) + gocv.PutText(&viz, "DIGIT BAND", image.Pt(10, digitBand.minY-5), + gocv.FontHersheyPlain, 1.0, cyan, 1) + + // Draw each digit box (adjust Y position to full image coordinates) + for i, box := range boxes { + // Adjust box coordinates to full image (add digitBand.minY offset) + adjustedBox := image.Rectangle{ + Min: image.Pt(box.Min.X, box.Min.Y+digitBand.minY), + Max: image.Pt(box.Max.X, box.Max.Y+digitBand.minY), + } + + // Cycle through colors + boxColor := colors[i%len(colors)] + + // Draw rectangle + gocv.Rectangle(&viz, adjustedBox, boxColor, 2) + + // Add digit number + label := fmt.Sprintf("D%d", i+1) + gocv.PutText(&viz, label, image.Pt(adjustedBox.Min.X+2, adjustedBox.Min.Y+15), + gocv.FontHersheyPlain, 1.0, boxColor, 2) + } + + // Add summary + summary := fmt.Sprintf("Total: %d digits detected", len(boxes)) + gocv.PutText(&viz, summary, image.Pt(10, 30), + gocv.FontHersheyPlain, 1.2, green, 2) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved full image with digit boxes to %s\n", filename) +} + +// ScaleByFactor scales an image by a specific scale factor +func ScaleByFactor(img gocv.Mat, scaleFactor float64) gocv.Mat { + // Calculate new dimensions + newWidth := int(float64(img.Cols()) * scaleFactor) + newHeight := int(float64(img.Rows()) * scaleFactor) + + // Resize image + scaled := gocv.NewMat() + gocv.Resize(img, &scaled, image.Point{X: newWidth, Y: newHeight}, 0, 0, gocv.InterpolationLinear) + + if DEBUG { + fmt.Printf("DEBUG: Scaled image from %dx%d to %dx%d (scale factor: %.3f)\n", + img.Cols(), img.Rows(), newWidth, newHeight, scaleFactor) + } + + return scaled +} + +// DisplayAreas holds the two display rectangles for SpO2 and HR +type DisplayAreas struct { + SpO2 image.Rectangle + HR image.Rectangle +} + +// mergeDigitBoxesWithCenter merges digit boxes into SpO2/HR displays using a specific centerX +func mergeDigitBoxesWithCenter(digitRegion gocv.Mat, digitBoxes []image.Rectangle, centerX int) DisplayAreas { + // Initialize bounding boxes + spo2MinX, spo2MaxX := digitRegion.Cols(), 0 + spo2MinY, spo2MaxY := digitRegion.Rows(), 0 + hrMinX, hrMaxX := digitRegion.Cols(), 0 + hrMinY, hrMaxY := digitRegion.Rows(), 0 + + // Track counts for logging + leftCount := 0 + rightCount := 0 + + // Split boxes by centerX and merge + for _, box := range digitBoxes { + // Calculate box center X to determine which half it's in + boxCenterX := (box.Min.X + box.Max.X) / 2 + + if boxCenterX < centerX { + // LEFT HALF - SpO2 + leftCount++ + if box.Min.X < spo2MinX { + spo2MinX = box.Min.X + } + if box.Max.X > spo2MaxX { + spo2MaxX = box.Max.X + } + if box.Min.Y < spo2MinY { + spo2MinY = box.Min.Y + } + if box.Max.Y > spo2MaxY { + spo2MaxY = box.Max.Y + } + } else { + // RIGHT HALF - HR + rightCount++ + if box.Min.X < hrMinX { + hrMinX = box.Min.X + } + if box.Max.X > hrMaxX { + hrMaxX = box.Max.X + } + if box.Min.Y < hrMinY { + hrMinY = box.Min.Y + } + if box.Max.Y > hrMaxY { + hrMaxY = box.Max.Y + } + } + } + + if DEBUG { + fmt.Printf("DEBUG: Split at center X=%d: %d boxes left (SpO2), %d boxes right (HR)\n", + centerX, leftCount, rightCount) + fmt.Printf("DEBUG: SpO2 merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + spo2MinX, spo2MaxX, spo2MinY, spo2MaxY, + spo2MaxX-spo2MinX, spo2MaxY-spo2MinY) + fmt.Printf("DEBUG: HR merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + hrMinX, hrMaxX, hrMinY, hrMaxY, + hrMaxX-hrMinX, hrMaxY-hrMinY) + } + + return DisplayAreas{ + SpO2: image.Rect(spo2MinX, spo2MinY, spo2MaxX, spo2MaxY), + HR: image.Rect(hrMinX, hrMinY, hrMaxX, hrMaxY), + } +} + +// VisualizeDetectedDisplays visualizes the detected SpO2 and HR display areas +func VisualizeDetectedDisplays(digitRegion gocv.Mat, displayAreas DisplayAreas, digitBoxes []image.Rectangle, centerX int, filename string) { + if !DEBUG { + return + } + + // Create visualization + viz := gocv.NewMat() + gocv.CvtColor(digitRegion, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + // Colors + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + green := color.RGBA{R: 0, G: 255, B: 0, A: 255} + + // Draw all individual digit boxes in green (thin lines) + for _, box := range digitBoxes { + gocv.Rectangle(&viz, box, green, 1) + } + + // Draw SpO2 area in red (thick) + gocv.Rectangle(&viz, displayAreas.SpO2, red, 3) + gocv.PutText(&viz, "SpO2", image.Pt(displayAreas.SpO2.Min.X+5, displayAreas.SpO2.Min.Y+20), + gocv.FontHersheyPlain, 1.5, red, 2) + + // Draw HR area in yellow (thick) + gocv.Rectangle(&viz, displayAreas.HR, yellow, 3) + gocv.PutText(&viz, "HR", image.Pt(displayAreas.HR.Min.X+5, displayAreas.HR.Min.Y+20), + gocv.FontHersheyPlain, 1.5, yellow, 2) + + // Draw center line from baseline + gocv.Line(&viz, + image.Pt(centerX, 0), + image.Pt(centerX, digitRegion.Rows()), + cyan, 2) + gocv.PutText(&viz, fmt.Sprintf("Center: %d", centerX), + image.Pt(centerX-40, 15), + gocv.FontHersheyPlain, 1.0, cyan, 1) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved detection visualization to %s\n", filename) +} + +// MergeDigitBoxesIntoDisplays merges individual digit boxes into two unified display areas +// This is the FINAL step of detection after finding digit boxes +// It splits at the center of the BOUNDING BOX (not image center) +// and merges all left boxes into SpO2, all right boxes into HR display area +func MergeDigitBoxesIntoDisplays(digitRegion gocv.Mat, digitBoxes []image.Rectangle) DisplayAreas { + // First, find the bounding box of ALL digit boxes + contentMinX := digitRegion.Cols() + contentMaxX := 0 + for _, box := range digitBoxes { + if box.Min.X < contentMinX { + contentMinX = box.Min.X + } + if box.Max.X > contentMaxX { + contentMaxX = box.Max.X + } + } + + // Calculate center of the CONTENT bounding box (not image center) + centerX := (contentMinX + contentMaxX) / 2 + + if DEBUG { + fmt.Printf("DEBUG: Content bounding box: X[%d-%d], Center X=%d\n", + contentMinX, contentMaxX, centerX) + } + + // Initialize bounding boxes + spo2MinX, spo2MaxX := digitRegion.Cols(), 0 + spo2MinY, spo2MaxY := digitRegion.Rows(), 0 + hrMinX, hrMaxX := digitRegion.Cols(), 0 + hrMinY, hrMaxY := digitRegion.Rows(), 0 + + // Track counts for logging + leftCount := 0 + rightCount := 0 + + // Split boxes by center X and merge + for _, box := range digitBoxes { + // Calculate box center X to determine which half it's in + boxCenterX := (box.Min.X + box.Max.X) / 2 + + if boxCenterX < centerX { + // LEFT HALF - SpO2 + leftCount++ + if box.Min.X < spo2MinX { + spo2MinX = box.Min.X + } + if box.Max.X > spo2MaxX { + spo2MaxX = box.Max.X + } + if box.Min.Y < spo2MinY { + spo2MinY = box.Min.Y + } + if box.Max.Y > spo2MaxY { + spo2MaxY = box.Max.Y + } + } else { + // RIGHT HALF - HR + rightCount++ + if box.Min.X < hrMinX { + hrMinX = box.Min.X + } + if box.Max.X > hrMaxX { + hrMaxX = box.Max.X + } + if box.Min.Y < hrMinY { + hrMinY = box.Min.Y + } + if box.Max.Y > hrMaxY { + hrMaxY = box.Max.Y + } + } + } + + if DEBUG { + fmt.Printf("DEBUG: Split at center X=%d: %d boxes left (SpO2), %d boxes right (HR)\n", + centerX, leftCount, rightCount) + fmt.Printf("DEBUG: SpO2 merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + spo2MinX, spo2MaxX, spo2MinY, spo2MaxY, + spo2MaxX-spo2MinX, spo2MaxY-spo2MinY) + fmt.Printf("DEBUG: HR merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + hrMinX, hrMaxX, hrMinY, hrMaxY, + hrMaxX-hrMinX, hrMaxY-hrMinY) + } + + return DisplayAreas{ + SpO2: image.Rect(spo2MinX, spo2MinY, spo2MaxX, spo2MaxY), + HR: image.Rect(hrMinX, hrMinY, hrMaxX, hrMaxY), + } +} + +// VisualizeMergedDisplays draws the merged display areas on the digit region +func VisualizeMergedDisplays(digitRegion gocv.Mat, displayAreas DisplayAreas, filename string) { + if !DEBUG { + return + } + + // Create visualization + viz := gocv.NewMat() + gocv.CvtColor(digitRegion, &viz, gocv.ColorGrayToBGR) + defer viz.Close() + + // Colors + red := color.RGBA{R: 255, G: 0, B: 0, A: 255} + yellow := color.RGBA{R: 255, G: 255, B: 0, A: 255} + cyan := color.RGBA{R: 0, G: 255, B: 255, A: 255} + + // Draw SpO2 area in red + gocv.Rectangle(&viz, displayAreas.SpO2, red, 3) + gocv.PutText(&viz, "SpO2", image.Pt(displayAreas.SpO2.Min.X+5, displayAreas.SpO2.Min.Y+20), + gocv.FontHersheyPlain, 1.5, red, 2) + + // Draw HR area in yellow + gocv.Rectangle(&viz, displayAreas.HR, yellow, 3) + gocv.PutText(&viz, "HR", image.Pt(displayAreas.HR.Min.X+5, displayAreas.HR.Min.Y+20), + gocv.FontHersheyPlain, 1.5, yellow, 2) + + // Draw center line at midpoint between the two display areas + centerX := (displayAreas.SpO2.Max.X + displayAreas.HR.Min.X) / 2 + gocv.Line(&viz, + image.Pt(centerX, 0), + image.Pt(centerX, digitRegion.Rows()), + cyan, 1) + + gocv.IMWrite(filename, viz) + fmt.Printf("DEBUG: Saved merged displays visualization to %s\n", filename) +} diff --git a/backups/backup_20251127/frame_source.go b/backups/backup_20251127/frame_source.go new file mode 100644 index 0000000..448020e --- /dev/null +++ b/backups/backup_20251127/frame_source.go @@ -0,0 +1,237 @@ +package main + +import ( + "fmt" + "image" + "sync" + "time" + + "gocv.io/x/gocv" +) + +// FrameSource provides frames to process +type FrameSource interface { + // Next returns the next frame and whether to continue processing + // Returns (frame, shouldContinue, error) + Next() (gocv.Mat, bool, error) + + // NextImmediate returns the very next frame (ignoring skip count) + // Used for immediate retry after low confidence/corruption + NextImmediate() (gocv.Mat, bool, error) + + // Close cleans up resources + Close() error + + // Name returns a description of the source + Name() string + + // IsActive returns false if source has been closed + IsActive() bool +} + +// RTSPSource reads from an RTSP stream +type RTSPSource struct { + url string + stream *gocv.VideoCapture + frameNum int + minFrameInterval time.Duration // Minimum time between processed frames + lastProcessedTime time.Time // Last time we processed a frame + lastAcquiredTime time.Time // Last time we acquired a frame (for logging) + closed bool + closeOnce sync.Once +} + +// NewRTSPSource creates a new RTSP frame source +// Processes frames at 4 fps (250ms minimum between frames) +func NewRTSPSource(url string) (*RTSPSource, error) { + stream, err := gocv.VideoCaptureFile(url) + if err != nil { + return nil, fmt.Errorf("failed to connect to RTSP stream: %w", err) + } + + logMessage(Console, Info, "šŸ“Š Processing frames at 4 fps (250ms minimum interval)") + + return &RTSPSource{ + url: url, + stream: stream, + minFrameInterval: 250 * time.Millisecond, + lastProcessedTime: time.Time{}, // Zero time = process first frame immediately + }, nil +} + +// readFrame reads and validates the next frame from the stream +func (s *RTSPSource) readFrame() (gocv.Mat, bool, error) { + // Check if source was closed + if s.closed { + return gocv.NewMat(), false, nil + } + + // Check if stream is valid + if s.stream == nil { + return gocv.NewMat(), false, fmt.Errorf("stream is null") + } + + frame := gocv.NewMat() + if ok := s.stream.Read(&frame); !ok { + frame.Close() + + // Check again before reconnecting + if s.closed { + return gocv.NewMat(), false, nil + } + + // Try to reconnect + s.stream.Close() + time.Sleep(5 * time.Second) + + // Check again after sleep + if s.closed { + return gocv.NewMat(), false, nil + } + + newStream, err := gocv.VideoCaptureFile(s.url) + if err != nil { + s.stream = nil // Set to nil to prevent further read attempts + s.closed = true // Mark as closed + return gocv.NewMat(), false, fmt.Errorf("reconnect failed: %w", err) + } + s.stream = newStream + return gocv.NewMat(), true, nil // Signal to retry + } + + s.frameNum++ + + // Check frame validity + if frame.Empty() || frame.Cols() < 640 || frame.Rows() < 480 { + frame.Close() + return gocv.NewMat(), true, nil // Signal to retry + } + + return frame, true, nil +} + +func (s *RTSPSource) Next() (gocv.Mat, bool, error) { + for { + frame, shouldContinue, err := s.readFrame() + if err != nil || !shouldContinue { + return frame, shouldContinue, err + } + + if frame.Empty() { + // readFrame returned empty (reconnecting or invalid), retry + continue + } + + // Check if enough time has passed since last processed frame was ACQUIRED + now := time.Now() + if !s.lastProcessedTime.IsZero() { + elapsed := now.Sub(s.lastProcessedTime) + if elapsed < s.minFrameInterval { + // Not enough time passed, skip this frame + frame.Close() + continue + } + } + + // Enough time passed, mark acquisition time NOW (before processing) + s.lastProcessedTime = now + + // Log frame acquisition interval + if !s.lastAcquiredTime.IsZero() { + interval := now.Sub(s.lastAcquiredTime).Milliseconds() + logMessage(LogFile, Debug, " [TIMING] Frame acquired: +%dms since last (target: 250ms)", interval) + } + s.lastAcquiredTime = now + + return frame, true, nil + } +} + +func (s *RTSPSource) NextImmediate() (gocv.Mat, bool, error) { + // Get the very next frame (no skip count) + for { + frame, shouldContinue, err := s.readFrame() + if err != nil || !shouldContinue { + return frame, shouldContinue, err + } + + if frame.Empty() { + // readFrame returned empty (reconnecting or invalid), retry + continue + } + + return frame, true, nil + } +} + +func (s *RTSPSource) Close() error { + s.closeOnce.Do(func() { + s.closed = true + // DON'T close the stream here - it causes segfault if read is in progress + // The stream will be cleaned up when main() exits + }) + return nil +} + +func (s *RTSPSource) IsActive() bool { + return !s.closed +} + +func (s *RTSPSource) Name() string { + return fmt.Sprintf("RTSP stream: %s", s.url) +} + +// FileSource reads a single frame from a file +type FileSource struct { + path string + done bool +} + +// NewFileSource creates a new file frame source +func NewFileSource(path string) *FileSource { + return &FileSource{ + path: path, + done: false, + } +} + +func (s *FileSource) Next() (gocv.Mat, bool, error) { + if s.done { + return gocv.NewMat(), false, nil + } + + frame := gocv.IMRead(s.path, gocv.IMReadColor) + if frame.Empty() { + return gocv.NewMat(), false, fmt.Errorf("failed to load frame: %s", s.path) + } + + s.done = true + return frame, false, nil // Return frame, but signal to stop after this +} + +func (s *FileSource) NextImmediate() (gocv.Mat, bool, error) { + // For file source, NextImmediate is same as Next + return s.Next() +} + +func (s *FileSource) Close() error { + return nil +} + +func (s *FileSource) IsActive() bool { + return !s.done +} + +func (s *FileSource) Name() string { + return fmt.Sprintf("File: %s", s.path) +} + +// preprocessFrame crops timestamp and rotates +func preprocessFrame(frame gocv.Mat) gocv.Mat { + // Crop timestamp (top 68 pixels) + noTs := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + rotated := gocv.NewMat() + gocv.Rotate(noTs, &rotated, gocv.Rotate90Clockwise) + noTs.Close() + return rotated +} diff --git a/backups/backup_20251127/go.mod b/backups/backup_20251127/go.mod new file mode 100644 index 0000000..0c8e373 --- /dev/null +++ b/backups/backup_20251127/go.mod @@ -0,0 +1,9 @@ +module pulse-monitor + +go 1.24.4 + +require ( + github.com/otiai10/gosseract/v2 v2.4.1 // indirect + gocv.io/x/gocv v0.42.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/backups/backup_20251127/go.sum b/backups/backup_20251127/go.sum new file mode 100644 index 0000000..3a01201 --- /dev/null +++ b/backups/backup_20251127/go.sum @@ -0,0 +1,7 @@ +github.com/otiai10/gosseract/v2 v2.4.1 h1:G8AyBpXEeSlcq8TI85LH/pM5SXk8Djy2GEXisgyblRw= +github.com/otiai10/gosseract/v2 v2.4.1/go.mod h1:1gNWP4Hgr2o7yqWfs6r5bZxAatjOIdqWxJLWsTsembk= +gocv.io/x/gocv v0.42.0 h1:AAsrFJH2aIsQHukkCovWqj0MCGZleQpVyf5gNVRXjQI= +gocv.io/x/gocv v0.42.0/go.mod h1:zYdWMj29WAEznM3Y8NsU3A0TRq/wR/cy75jeUypThqU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/backups/backup_20251127/helpers.go b/backups/backup_20251127/helpers.go new file mode 100644 index 0000000..4a0920e --- /dev/null +++ b/backups/backup_20251127/helpers.go @@ -0,0 +1,117 @@ +package main + +import ( + "fmt" + "io" + "time" +) + +// LogTarget specifies where to write the log message +type LogTarget int +const ( + Console LogTarget = iota + LogFile + Both +) + +// LogLevel specifies the severity/type of log message +type LogLevel int +const ( + Debug LogLevel = iota + Info + Warning + Error +) + +// Global logger - set once during initialization +var globalLogger io.Writer = nil + +// logMessage writes a formatted log message to the specified target(s) +// Debug messages are automatically filtered out when DEBUG_MODE is false +// +// Usage examples: +// logMessage(Console, Info, "SpO2=%d%%, HR=%d bpm", spo2, hr) +// logMessage(Both, Debug, "Before preprocess: %dx%d", cols, rows) +// logMessage(Both, Error, "Layout detection failed: %v", err) +// logMessage(Both, Warning, "Low confidence (#%d)", count) +func logMessage(target LogTarget, level LogLevel, format string, args ...interface{}) { + // Filter debug messages when not in debug mode + if level == Debug && !DEBUG_MODE { + return + } + + // Format the message + message := fmt.Sprintf(format, args...) + + // Single-letter prefix for fixed-width alignment + var levelChar string + switch level { + case Debug: + levelChar = "D" + case Info: + levelChar = "I" + case Warning: + levelChar = "W" + case Error: + levelChar = "E" + } + + // Format with timestamp + timestamp := time.Now().Format("15:04:05.000") + formattedMessage := fmt.Sprintf("[%s] %s %s\n", timestamp, levelChar, message) + + // Write to target(s) + switch target { + case Console: + fmt.Print(formattedMessage) + case LogFile: + if globalLogger != nil { + fmt.Fprint(globalLogger, formattedMessage) + } + case Both: + fmt.Print(formattedMessage) + if globalLogger != nil { + fmt.Fprint(globalLogger, formattedMessage) + } + } +} + +// logf is DEPRECATED - use logMessage() instead +// Kept temporarily for reference, but no longer used in the codebase +/* +func logf(logger io.Writer, format string, args ...interface{}) { + if logger != nil { + fmt.Fprintf(logger, format, args...) + } +} +*/ + +// printTimingTable prints timing data in horizontal table format +// Prints header every 20 frames for readability +// Note: Acquire is separate (camera waiting time) +// Note: Total is wall-clock processing time (may not equal sum due to logging/overhead) +// Note: HASS is added in main loop after function returns +func printTimingTable(timing TimingData, showHeader bool) { + if !TIMING_MODE { + return + } + + if showHeader { + fmt.Println("") + fmt.Println("Frame | Acquire | Thresh | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total") + fmt.Println("------|---------|--------|------|-------|----------|--------|-------|--------|------|-------") + } + + fmt.Printf("#%-4d | %5dms | %5dms | %3dms | %4dms | %7dms | %5dms | %4dms | %5dms | %3dms | %4dms\n", + timing.FrameNum, + timing.Acquire, + timing.Threshold, + timing.Preprocess, + timing.Scale, + timing.OCR_SpO2, + timing.OCR_HR, + timing.Validation, + timing.FileIO, + timing.HASS, + timing.Total) +} diff --git a/backups/backup_20251127/homeassistant.go b/backups/backup_20251127/homeassistant.go new file mode 100644 index 0000000..361fb29 --- /dev/null +++ b/backups/backup_20251127/homeassistant.go @@ -0,0 +1,43 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +func postToHomeAssistant(config *Config, entityID string, value int, unit string, friendlyName string) error { + url := fmt.Sprintf("%s/api/states/%s", config.HomeAssistant.URL, entityID) + payload := map[string]interface{}{ + "state": fmt.Sprintf("%d", value), + "attributes": map[string]interface{}{ + "unit_of_measurement": unit, + "friendly_name": friendlyName, + "device_class": "measurement", + }, + } + jsonData, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal error: %w", err) + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("request error: %w", err) + } + req.Header.Set("Authorization", "Bearer "+config.HomeAssistant.Token) + req.Header.Set("Content-Type", "application/json") + client := &http.Client{Timeout: 5 * time.Second} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("HTTP error: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != 200 && resp.StatusCode != 201 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) + } + return nil +} diff --git a/backups/backup_20251127/html_report.go b/backups/backup_20251127/html_report.go new file mode 100644 index 0000000..bdb0055 --- /dev/null +++ b/backups/backup_20251127/html_report.go @@ -0,0 +1,281 @@ +package main + +import ( + "fmt" + "os" +) + +type ReviewEntry struct { + FrameNum int + Timestamp string + SpO2Value int + SpO2LeftDigit int + SpO2LeftConf float64 + SpO2RightDigit int + SpO2RightConf float64 + HRValue int + HRLeftDigit int + HRLeftConf float64 + HRRightDigit int + HRRightConf float64 + Failed bool + FailureReason string + Unstable bool + UnstableReason string +} + +// initReviewHTML creates the HTML file with header and opens the table +func initReviewHTML() error { + html := ` + + + + Pulse-Ox Recognition Review + + + + + + +

Pulse-Ox Recognition Review (Live)

+

Tip: Press Ctrl+F (or Cmd+F on Mac) and search for "CHECK" to find frames with low confidence (<85%)

+

Refresh page to see latest frames...

+ + + + + + + + +` + return os.WriteFile("review/review.html", []byte(html), 0644) +} + +// appendReviewEntry appends a single entry to the HTML file +func appendReviewEntry(e ReviewEntry) error { + confClass := func(conf float64) string { + if conf >= 85 { + return "conf-high" + } else if conf >= 70 { + return "conf-med" + } + return "conf-low" + } + + needsReview := e.SpO2LeftConf < 85 || e.SpO2RightConf < 85 || e.HRLeftConf < 85 || e.HRRightConf < 85 + reviewMarker := "" + if needsReview { + reviewMarker = " āš ļøCHECK" + } + + var rowHTML string + + if e.Failed { + // Failed recognition entry + rowHTML = fmt.Sprintf(` + + + + + + +`, e.FrameNum, reviewMarker, e.Timestamp, e.FrameNum, e.FrameNum, e.FailureReason, + e.SpO2Value, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.SpO2LeftDigit, confClass(e.SpO2LeftConf), e.SpO2LeftConf, + e.FrameNum, e.FrameNum, e.SpO2RightDigit, confClass(e.SpO2RightConf), e.SpO2RightConf, + e.HRValue, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.HRLeftDigit, confClass(e.HRLeftConf), e.HRLeftConf, + e.FrameNum, e.FrameNum, e.HRRightDigit, confClass(e.HRRightConf), e.HRRightConf) + } else { + // Successful recognition entry + unstableMarker := "" + if e.Unstable { + unstableMarker = fmt.Sprintf("
āš ļø %s", e.UnstableReason) + } + rowHTML = fmt.Sprintf(` + + + + + + +`, e.FrameNum, reviewMarker, unstableMarker, e.Timestamp, + e.FrameNum, e.FrameNum, + e.SpO2Value, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.SpO2LeftDigit, confClass(e.SpO2LeftConf), e.SpO2LeftConf, + e.FrameNum, e.FrameNum, e.SpO2RightDigit, confClass(e.SpO2RightConf), e.SpO2RightConf, + e.HRValue, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, + e.FrameNum, e.FrameNum, e.HRLeftDigit, confClass(e.HRLeftConf), e.HRLeftConf, + e.FrameNum, e.FrameNum, e.HRRightDigit, confClass(e.HRRightConf), e.HRRightConf) + } + + // Open file in append mode + f, err := os.OpenFile("review/review.html", os.O_APPEND|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(rowHTML) + return err +} + +// closeReviewHTML writes the closing tags +func closeReviewHTML() error { + footer := `
FrameTimeDetected BoxesSpO2 RecognitionHR Recognition
%d%s%s + + + āŒ FAILED: %s
+ SpO2: %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+ HR: %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
%d%s%s%s + + + %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+ %d
+
+ +
+
+ + D1 +
+
+ + %d + (%.0f%%) +
+
+ + %d + (%.0f%%) +
+
+

+ Color coding: + Green ≄85%, + Orange 70-84%, + Red <70% +

+

To add digits to training set, copy files like: cp review/f5_spo2_digit2.png training_digits/9_2.png

+ +` + + f, err := os.OpenFile("review/review.html", os.O_APPEND|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(footer) + return err +} + +// writeReviewHTML - kept for backward compatibility (final shutdown) +func writeReviewHTML(entries []ReviewEntry) error { + // Just close the HTML properly - entries already appended + return closeReviewHTML() +} diff --git a/backups/backup_20251127/layout_detection.go b/backups/backup_20251127/layout_detection.go new file mode 100644 index 0000000..a851f1b --- /dev/null +++ b/backups/backup_20251127/layout_detection.go @@ -0,0 +1,52 @@ +package main + +import ( + "fmt" + "image" + "image/color" + + "gocv.io/x/gocv" +) + +// ScreenLayout holds the SpO2 and HR display areas +// Used by processor.go for OCR extraction +type ScreenLayout struct { + SpO2Area image.Rectangle + HRArea image.Rectangle +} + +// saveLayoutVisualization draws the detected layout on an image for debugging +func saveLayoutVisualization(img gocv.Mat, layout *ScreenLayout, filename string) { + visualization := gocv.NewMat() + defer visualization.Close() + + // Convert to color if grayscale + if img.Channels() == 1 { + gocv.CvtColor(img, &visualization, gocv.ColorGrayToBGR) + } else { + img.CopyTo(&visualization) + } + + // Draw SpO2 box in red + red := color.RGBA{255, 0, 0, 255} + gocv.Rectangle(&visualization, layout.SpO2Area, red, 3) + gocv.PutText(&visualization, "SpO2", image.Pt(layout.SpO2Area.Min.X, layout.SpO2Area.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, red, 2) + + // Draw HR box in cyan + cyan := color.RGBA{0, 255, 255, 255} + gocv.Rectangle(&visualization, layout.HRArea, cyan, 3) + gocv.PutText(&visualization, "HR", image.Pt(layout.HRArea.Min.X, layout.HRArea.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, cyan, 2) + + // Add dimensions + spo2Text := fmt.Sprintf("%dx%d", layout.SpO2Area.Dx(), layout.SpO2Area.Dy()) + gocv.PutText(&visualization, spo2Text, image.Pt(layout.SpO2Area.Min.X, layout.SpO2Area.Max.Y+20), + gocv.FontHersheyPlain, 1.0, red, 1) + + hrText := fmt.Sprintf("%dx%d", layout.HRArea.Dx(), layout.HRArea.Dy()) + gocv.PutText(&visualization, hrText, image.Pt(layout.HRArea.Min.X, layout.HRArea.Max.Y+20), + gocv.FontHersheyPlain, 1.0, cyan, 1) + + gocv.IMWrite(filename, visualization) +} diff --git a/backups/backup_20251127/ocr.go b/backups/backup_20251127/ocr.go new file mode 100644 index 0000000..d489316 --- /dev/null +++ b/backups/backup_20251127/ocr.go @@ -0,0 +1,569 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "io" + "os" + "strings" + "time" + + "gocv.io/x/gocv" +) + +func loadTemplates() (map[int][]gocv.Mat, error) { + templates := make(map[int][]gocv.Mat) + files, err := os.ReadDir("training_digits") + if err != nil { + logMessage(Console, Warning, "āš ļø WARNING: training_digits directory not found or cannot be read: %v", err) + logMessage(Console, Warning, "āš ļø Starting with empty template set. Recognition will not work until templates are added.") + logMessage(Console, Info, "") + return templates, nil + } + templateCount := 0 + for _, file := range files { + if !strings.HasSuffix(file.Name(), ".png") { + continue + } + name := strings.TrimSuffix(file.Name(), ".png") + parts := strings.Split(name, "_") + var digit int + _, err := fmt.Sscanf(parts[0], "%d", &digit) + if err != nil || digit < 0 || digit > 9 { + continue + } + filename := fmt.Sprintf("training_digits/%s", file.Name()) + template := gocv.IMRead(filename, gocv.IMReadGrayScale) + if template.Empty() { + logMessage(Console, Warning, "Warning: Failed to load %s", filename) + continue + } + templates[digit] = append(templates[digit], template) + templateCount++ + } + + // Load invalid digit markers (use index -1 for invalid patterns) + invalidFiles, err := os.ReadDir("training_digits/invalid") + if err == nil { + invalidCount := 0 + for _, file := range invalidFiles { + if !strings.HasSuffix(file.Name(), ".png") { + continue + } + filename := fmt.Sprintf("training_digits/invalid/%s", file.Name()) + template := gocv.IMRead(filename, gocv.IMReadGrayScale) + if template.Empty() { + logMessage(Console, Warning, "Warning: Failed to load invalid template %s", filename) + continue + } + templates[-1] = append(templates[-1], template) + invalidCount++ + } + if invalidCount > 0 { + logMessage(Console, Info, "āœ“ Loaded %d invalid digit markers", invalidCount) + } + } + + for digit := 0; digit <= 9; digit++ { + if len(templates[digit]) == 0 { + logMessage(Console, Warning, "āš ļø WARNING: No templates found for digit %d", digit) + } + } + logMessage(Console, Info, "āœ“ Loaded %d digit templates (0-9)", templateCount) + return templates, nil +} + +func matchDigit(digitImg gocv.Mat, templates map[int][]gocv.Mat) (int, float64) { + bestDigit := -1 + bestScore := 0.0 + bestInvalidScore := 0.0 + + // Check invalid patterns first (stored at index -1) + invalidTemplates := templates[-1] + for _, template := range invalidTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestInvalidScore { + bestInvalidScore = score + } + } + + // Check regular digits (0-9) + for digit := 0; digit <= 9; digit++ { + digitTemplates := templates[digit] + if len(digitTemplates) == 0 { + // No templates for this digit, skip + continue + } + for _, template := range digitTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestScore { + bestScore = score + bestDigit = digit + } + } + } + + // If invalid pattern matches better than any digit (and score > 70%), return -1 + if bestInvalidScore > bestScore && bestInvalidScore > 70 { + return -1, bestInvalidScore + } + + return bestDigit, bestScore +} + +// Helper function: check if there's a '1' digit by cutting at the expected width and matching templates +// ONLY checks against '1' templates to prevent false matches with '4', '7', etc. +func hasOneAt(thresh gocv.Mat, x int, templates map[int][]gocv.Mat, logger io.Writer) bool { + h := thresh.Rows() + w := thresh.Cols() + + // Calculate the region where a '1' would be if it exists + // '1' is 72px wide, so extract from (x - 72) to x + leftEdge := x - DIGIT_ONE_WIDTH + if leftEdge < 0 { + leftEdge = 0 + } + + if x > w { + x = w + } + + logMessage(Both, Debug, " hasOneAt(x=%d): extracting region [%d..%d] (width=%d) from display width=%d", + x, leftEdge, x, x-leftEdge, w) + + // Extract the potential '1' region + region := thresh.Region(image.Rect(leftEdge, 0, x, h)) + digitImg := region.Clone() + region.Close() + + // Match ONLY against '1' templates (don't check other digits) + oneTemplates := templates[1] + if len(oneTemplates) == 0 { + logMessage(LogFile, Warning, " hasOneAt(x=%d): No '1' templates loaded -> NO", x) + digitImg.Close() + return false + } + + bestScore := 0.0 + for _, template := range oneTemplates { + compareImg := digitImg + if digitImg.Rows() != template.Rows() || digitImg.Cols() != template.Cols() { + resized := gocv.NewMat() + gocv.Resize(digitImg, &resized, image.Pt(template.Cols(), template.Rows()), 0, 0, gocv.InterpolationLinear) + defer resized.Close() + compareImg = resized + } + diff := gocv.NewMat() + gocv.AbsDiff(compareImg, template, &diff) + totalPixels := compareImg.Rows() * compareImg.Cols() + diffPixels := gocv.CountNonZero(diff) + samePixels := totalPixels - diffPixels + score := (float64(samePixels) / float64(totalPixels)) * 100.0 + diff.Close() + if score > bestScore { + bestScore = score + } + } + digitImg.Close() + + // If it matches '1' with >85% confidence, we have a '1' + if bestScore > 85.0 { + logMessage(Both, Debug, " hasOneAt(x=%d): matched '1' with %.1f%% confidence -> YES", x, bestScore) + return true + } + + logMessage(Both, Debug, " hasOneAt(x=%d): best '1' match %.1f%% confidence -> NO", x, bestScore) + return false +} + +// validateEight checks if a digit image has the two characteristic holes of an '8' +// by checking for black pixels along horizontal lines where holes should be +func validateEight(digitImg gocv.Mat, logger io.Writer) bool { + h := digitImg.Rows() + w := digitImg.Cols() + + // Upper hole: 30% of height + // Lower hole: 70% of height + // Horizontal range: 40%-50% of width + topHoleY := (h * 30) / 100 + bottomHoleY := (h * 70) / 100 + xStart := (w * 40) / 100 + xEnd := (w * 50) / 100 + + // Check for ANY black pixel in the horizontal range + // In thresholded image: black (0) = hole, white (255) = digit + topHoleFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(topHoleY, x) + if pixel < 128 { + topHoleFound = true + break + } + } + + bottomHoleFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(bottomHoleY, x) + if pixel < 128 { + bottomHoleFound = true + break + } + } + + logMessage(LogFile, Debug, " validateEight: top hole @ y=%d (30%%) x=%d-%d (%v), bottom hole @ y=%d (70%%) x=%d-%d (%v)", + topHoleY, xStart, xEnd, topHoleFound, + bottomHoleY, xStart, xEnd, bottomHoleFound) + + return topHoleFound && bottomHoleFound +} + +// validateZero checks if a digit image has the characteristic hole of a '0' +// by checking for black pixels along a horizontal line at the center +func validateZero(digitImg gocv.Mat, logger io.Writer) bool { + h := digitImg.Rows() + w := digitImg.Cols() + + // Center hole: 50% of height + // Horizontal range: 30%-70% of width (wider range than '8' since '0' hole is larger) + centerY := h / 2 + xStart := (w * 30) / 100 + xEnd := (w * 70) / 100 + + // Check for ANY black pixel in the horizontal range + // In thresholded image: black (0) = hole, white (255) = digit + holeFound := false + for x := xStart; x <= xEnd; x++ { + pixel := digitImg.GetUCharAt(centerY, x) + if pixel < 128 { + holeFound = true + break + } + } + + logMessage(LogFile, Debug, " validateZero: center hole @ y=%d (50%%) x=%d-%d (%v)", + centerY, xStart, xEnd, holeFound) + + return holeFound +} + +// New simplified version that works with fixed-width rectangles +func recognizeDisplayArea(rotated gocv.Mat, area image.Rectangle, templates map[int][]gocv.Mat, displayName string, frameCount int, logger io.Writer) (int, int, float64, int, float64) { + startTime := time.Now() + cleanName := strings.TrimSpace(strings.ToLower(displayName)) + + // Extract the display area (ALREADY THRESHOLDED BINARY IMAGE) + region := rotated.Region(area) + thresh := region.Clone() + region.Close() + + // DEBUG: Save extracted region - already thresholded + if DEBUG_MODE { + step1Filename := fmt.Sprintf("review/f%d_%s_step1_extracted.png", frameCount, cleanName) + gocv.IMWrite(step1Filename, thresh) + } + + w := thresh.Cols() + h := thresh.Rows() + + // Log display dimensions + logMessage(LogFile, Debug, " %s display: w=%d, h=%d", displayName, w, h) + + // Sequential check for '1' patterns to determine digit widths + // hasOneAt checks if column X is white and column X+10 is black (detects RIGHT EDGE where '1' ends) + + // Digit detection (right to left: digit3, digit2, digit1) + // D stores cut positions, digitIsOne stores detection results + D := make([]int, 4) // indices 1,2,3 (0 unused) + digitIsOne := make([]bool, 4) // indices 1,2,3 (0 unused) + + totalWidth := 0 + + logMessage(Both, Debug, " [%s] Display width=%d, starting digit detection...", displayName, w) + + detectionStart := time.Now() + for i := 3; i >= 1; i-- { + var checkPos int + if i == 3 { + checkPos = w // Check at right edge, not w-8 + } else { + checkPos = w - totalWidth - 5 + } + + logMessage(Both, Debug, " [%s] Digit %d: checkPos=%d (w=%d, totalWidth=%d)", displayName, i, checkPos, w, totalWidth) + digitIsOne[i] = hasOneAt(thresh, checkPos, templates, logger) + + // Add width to accumulator + var widthUsed int + if digitIsOne[i] { + widthUsed = DIGIT_ONE_WIDTH + totalWidth += DIGIT_ONE_WIDTH + logMessage(Both, Debug, " [%s] Digit %d: IS ONE -> using width=%d (totalWidth now %d)", displayName, i, widthUsed, totalWidth) + } else { + widthUsed = DIGIT_NON_ONE_WIDTH + totalWidth += DIGIT_NON_ONE_WIDTH + logMessage(Both, Debug, " [%s] Digit %d: NOT ONE -> using width=%d (totalWidth now %d)", displayName, i, widthUsed, totalWidth) + } + + D[i] = w - totalWidth + logMessage(Both, Debug, " [%s] Digit %d: CUT position D[%d]=%d", displayName, i, i, D[i]) + } + logMessage(LogFile, Debug, " [TIMING][%s] Digit detection (hasOneAt x3): %dms", displayName, time.Since(detectionStart).Milliseconds()) + + logMessage(Both, Debug, " [%s] Final cut positions: D[1]=%d, D[2]=%d, D[3]=%d", displayName, D[1], D[2], D[3]) + + // Create visualization image showing check positions AND cut positions + visImg := gocv.NewMat() + gocv.CvtColor(thresh, &visImg, gocv.ColorGrayToBGR) + + // Draw vertical lines at check positions with labels + greenColor := color.RGBA{0, 255, 0, 255} // Green for '1' detected + redColor := color.RGBA{0, 0, 255, 255} // Red for no '1' + yellowColor := color.RGBA{0, 255, 255, 255} // Yellow for CUT lines + + // Digit 3 check line + digit3Color := redColor + if digitIsOne[3] { + digit3Color = greenColor + } + gocv.Line(&visImg, image.Pt(w-8, 0), image.Pt(w-8, h), digit3Color, 2) + gocv.PutText(&visImg, "D3", image.Pt(w-8+5, 20), gocv.FontHersheyDuplex, 0.5, digit3Color, 1) + + // Digit 2 check line (calculate position from accumulated width) + var digit2Width int + if digitIsOne[3] { + digit2Width = DIGIT_ONE_WIDTH + } else { + digit2Width = DIGIT_NON_ONE_WIDTH + } + digit2CheckPos := w - digit2Width - 5 + digit2Color := redColor + if digitIsOne[2] { + digit2Color = greenColor + } + gocv.Line(&visImg, image.Pt(digit2CheckPos, 0), image.Pt(digit2CheckPos, h), digit2Color, 2) + gocv.PutText(&visImg, "D2", image.Pt(digit2CheckPos+5, 40), gocv.FontHersheyDuplex, 0.5, digit2Color, 1) + + // Digit 1 check line (calculate position from accumulated width) + var digit3Width int + if digitIsOne[3] { + digit3Width = DIGIT_ONE_WIDTH + } else { + digit3Width = DIGIT_NON_ONE_WIDTH + } + var digit2WidthForD1 int + if digitIsOne[2] { + digit2WidthForD1 = DIGIT_ONE_WIDTH + } else { + digit2WidthForD1 = DIGIT_NON_ONE_WIDTH + } + digit1CheckPos := w - digit3Width - digit2WidthForD1 - 5 + digit1Color := redColor + if digitIsOne[1] { + digit1Color = greenColor + } + gocv.Line(&visImg, image.Pt(digit1CheckPos, 0), image.Pt(digit1CheckPos, h), digit1Color, 2) + gocv.PutText(&visImg, "D1", image.Pt(digit1CheckPos+5, 60), gocv.FontHersheyDuplex, 0.5, digit1Color, 1) + + // Draw CUT lines in yellow showing where we actually split the digits + gocv.Line(&visImg, image.Pt(D[2], 0), image.Pt(D[2], h), yellowColor, 3) + gocv.PutText(&visImg, "CUT1", image.Pt(D[2]+5, h-10), gocv.FontHersheyDuplex, 0.6, yellowColor, 2) + + gocv.Line(&visImg, image.Pt(D[3], 0), image.Pt(D[3], h), yellowColor, 3) + gocv.PutText(&visImg, "CUT2", image.Pt(D[3]+5, h-30), gocv.FontHersheyDuplex, 0.6, yellowColor, 2) + + // Save visualization (always - needed for debugging failures) + visFilename := fmt.Sprintf("review/f%d_%s_checks.png", frameCount, cleanName) + gocv.IMWrite(visFilename, visImg) + visImg.Close() + + // Clamp cut positions to valid range + if D[3] < 0 { + D[3] = 0 + } + if D[2] < 0 { + D[2] = 0 + } + + // Extract three digit regions + logMessage(Both, Debug, " [%s] Extracting digit1: region [0..%d] (width=%d)", displayName, D[2], D[2]) + digit1Region := thresh.Region(image.Rect(0, 0, D[2], h)) + digit1 := digit1Region.Clone() + digit1Region.Close() + + logMessage(Both, Debug, " [%s] Extracting digit2: region [%d..%d] (width=%d)", displayName, D[2], D[3], D[3]-D[2]) + digit2Region := thresh.Region(image.Rect(D[2], 0, D[3], h)) + digit2 := digit2Region.Clone() + digit2Region.Close() + + logMessage(Both, Debug, " [%s] Extracting digit3: region [%d..%d] (width=%d)", displayName, D[3], w, w-D[3]) + digit3Region := thresh.Region(image.Rect(D[3], 0, w, h)) + digit3 := digit3Region.Clone() + digit3Region.Close() + + // Match all three digits + matchStart := time.Now() + num1, score1 := matchDigit(digit1, templates) + num2, score2 := matchDigit(digit2, templates) + num3, score3 := matchDigit(digit3, templates) + logMessage(LogFile, Debug, " [TIMING][%s] matchDigit x3: %dms", displayName, time.Since(matchStart).Milliseconds()) + + // Validate '8' digits - check for two holes + if num1 == 8 && !validateEight(digit1, logger) { + logMessage(LogFile, Debug, " āš ļø Digit1 recognized as 8 but failed hole validation - marking invalid") + num1 = -1 + score1 = 0.0 + } + if num2 == 8 && !validateEight(digit2, logger) { + logMessage(LogFile, Debug, " āš ļø Digit2 recognized as 8 but failed hole validation - marking invalid") + num2 = -1 + score2 = 0.0 + } + if num3 == 8 && !validateEight(digit3, logger) { + logMessage(LogFile, Debug, " āš ļø Digit3 recognized as 8 but failed hole validation - marking invalid") + num3 = -1 + score3 = 0.0 + } + + // Validate '0' digits - check for center hole + if num1 == 0 && !validateZero(digit1, logger) { + logMessage(LogFile, Debug, " āš ļø Digit1 recognized as 0 but failed hole validation - marking invalid") + num1 = -1 + score1 = 0.0 + } + if num2 == 0 && !validateZero(digit2, logger) { + logMessage(LogFile, Debug, " āš ļø Digit2 recognized as 0 but failed hole validation - marking invalid") + num2 = -1 + score2 = 0.0 + } + if num3 == 0 && !validateZero(digit3, logger) { + logMessage(LogFile, Debug, " āš ļø Digit3 recognized as 0 but failed hole validation - marking invalid") + num3 = -1 + score3 = 0.0 + } + + // Calculate final number + var number int + if digitIsOne[1] { + // 3-digit number: 1XX + number = 100 + num2*10 + num3 + } else { + // 2-digit number: XX (digit1 region is mostly empty/padding) + number = num2*10 + num3 + } + + // Save individual digits (only when SAVE_CROPS flag is set - for approving templates) + if SAVE_CROPS { + saveStart := time.Now() + digit1Filename := fmt.Sprintf("review/f%d_%s_digit1.png", frameCount, cleanName) + digit2Filename := fmt.Sprintf("review/f%d_%s_digit2.png", frameCount, cleanName) + digit3Filename := fmt.Sprintf("review/f%d_%s_digit3.png", frameCount, cleanName) + gocv.IMWrite(digit1Filename, digit1) + gocv.IMWrite(digit2Filename, digit2) + gocv.IMWrite(digit3Filename, digit3) + logMessage(LogFile, Debug, " [TIMING][%s] Save digit images: %dms", displayName, time.Since(saveStart).Milliseconds()) + } + + // Create labeled full image (combine all three digits) + combinedWidth := digit1.Cols() + digit2.Cols() + digit3.Cols() + combinedHeight := digit1.Rows() + if digit2.Rows() > combinedHeight { + combinedHeight = digit2.Rows() + } + if digit3.Rows() > combinedHeight { + combinedHeight = digit3.Rows() + } + + fullForLabel := gocv.NewMatWithSize(combinedHeight, combinedWidth, digit1.Type()) + + // Copy digit1 + digit1ROI := fullForLabel.Region(image.Rect(0, 0, digit1.Cols(), digit1.Rows())) + digit1.CopyTo(&digit1ROI) + digit1ROI.Close() + + // Copy digit2 + digit2ROI := fullForLabel.Region(image.Rect(digit1.Cols(), 0, digit1.Cols()+digit2.Cols(), digit2.Rows())) + digit2.CopyTo(&digit2ROI) + digit2ROI.Close() + + // Copy digit3 + digit3ROI := fullForLabel.Region(image.Rect(digit1.Cols()+digit2.Cols(), 0, combinedWidth, digit3.Rows())) + digit3.CopyTo(&digit3ROI) + digit3ROI.Close() + + // Add padding at top for label + labeledFull := gocv.NewMat() + gocv.CopyMakeBorder(fullForLabel, &labeledFull, 60, 0, 0, 0, gocv.BorderConstant, color.RGBA{0, 0, 0, 255}) + fullForLabel.Close() + + // Draw label + label := fmt.Sprintf("%d", number) + textColor := color.RGBA{255, 255, 255, 255} + gocv.PutText(&labeledFull, label, image.Pt(10, 45), gocv.FontHersheyDuplex, 1.8, textColor, 3) + + // Save labeled full image (always - needed for debugging failures) + fullFilename := fmt.Sprintf("review/f%d_%s_full.png", frameCount, cleanName) + gocv.IMWrite(fullFilename, labeledFull) + labeledFull.Close() + + digit1.Close() + digit2.Close() + digit3.Close() + thresh.Close() + + // Calculate widths for logging + digit3Width = DIGIT_ONE_WIDTH + if !digitIsOne[3] { + digit3Width = DIGIT_NON_ONE_WIDTH + } + digit2Width = DIGIT_ONE_WIDTH + if !digitIsOne[2] { + digit2Width = DIGIT_NON_ONE_WIDTH + } + + // Print debug info to log only + if digitIsOne[1] { + logMessage(LogFile, Info, " %s: 1%d%d [3-DIGIT: d1=1@%dpx (x=0-%d), d2=%s@%dpx (x=%d-%d), d3=%s@%dpx (x=%d-%d)] (scores: %d=%.0f%%, %d=%.0f%%, %d=%.0f%%)", + displayName, num2, num3, D[2], D[2], + map[bool]string{true: "1", false: "X"}[digitIsOne[2]], digit2Width, D[2], D[3], + map[bool]string{true: "1", false: "X"}[digitIsOne[3]], digit3Width, D[3], w, + num1, score1, num2, score2, num3, score3) + } else { + logMessage(LogFile, Info, " %s: %d%d [2-DIGIT: d2=%s@%dpx (x=%d-%d), d3=%s@%dpx (x=%d-%d)] (scores: %d=%.0f%%, %d=%.0f%%)", + displayName, num2, num3, + map[bool]string{true: "1", false: "X"}[digitIsOne[2]], digit2Width, D[2], D[3], + map[bool]string{true: "1", false: "X"}[digitIsOne[3]], digit3Width, D[3], w, + num2, score2, num3, score3) + } + + logMessage(LogFile, Debug, " [TIMING][%s] Total recognizeDisplayArea: %dms", displayName, time.Since(startTime).Milliseconds()) + + // Return using middle two digits for compatibility with existing code + return number, num2, score2, num3, score3 +} diff --git a/backups/backup_20251127/processor.go b/backups/backup_20251127/processor.go new file mode 100644 index 0000000..30a14a6 --- /dev/null +++ b/backups/backup_20251127/processor.go @@ -0,0 +1,285 @@ +package main + +import ( + "fmt" + "io" + "time" + + "gocv.io/x/gocv" +) + +// Processor handles the frame processing pipeline +type Processor struct { + templates map[int][]gocv.Mat + config *Config + logger io.Writer +} + +// NewProcessor creates a new processor +func NewProcessor(templates map[int][]gocv.Mat, config *Config, logger io.Writer) *Processor { + return &Processor{ + templates: templates, + config: config, + logger: logger, + } +} + +// buildReviewEntry creates a ReviewEntry from a Reading +func buildReviewEntry(reading *Reading, failed bool, failureReason string, unstable bool, unstableReason string) ReviewEntry { + return ReviewEntry{ + FrameNum: reading.FrameNum, + Timestamp: reading.Timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Failed: failed, + FailureReason: failureReason, + Unstable: unstable, + UnstableReason: unstableReason, + } +} + +// handleFrameFailure handles all failed frame processing: +// - Saves debug files (raw frame + layout visualization) +// - Creates and appends review entry +// - Calculates total timing +// - Returns ProcessingResult with given status +func handleFrameFailure( + rawFrame gocv.Mat, + normalized gocv.Mat, + layout *ScreenLayout, + reading *Reading, + status ProcessingStatus, + failureReason string, + state *ProcessingState, + timing *TimingData, + frameStartTime time.Time, +) ProcessingResult { + // Save debug files + fileIOStart := time.Now() + saveThresholdedFrame(rawFrame, reading.FrameNum) + saveLayoutVisualization(normalized, layout, fmt.Sprintf("review/f%d_boxes.jpg", reading.FrameNum)) + timing.FileIO = time.Since(fileIOStart).Milliseconds() + + // Create and append review entry + entry := buildReviewEntry(reading, true, failureReason, false, "") + state.ReviewEntries = append(state.ReviewEntries, entry) + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + // Calculate total timing + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: status, + Reading: *reading, + } +} + +// processFrame runs OCR on a normalized frame with valid layout +// Frame is already rotated and scaled by the caller +// rawFrame is the untouched frame from source (used for saving on errors) +func (p *Processor) processFrame(frame gocv.Mat, rawFrame gocv.Mat, frameNum int, state *ProcessingState, timing *TimingData) ProcessingResult { + timestamp := time.Now().Format("15:04:05.000") + frameStartTime := time.Now() + + logMessage(LogFile, Info, "Frame #%d", frameNum) + + // Frame is already scaled - use directly + normalized := frame + timing.Scale = 0 // No scaling in processor anymore + + // Run OCR + logMessage(LogFile, Info, " Recognizing displays...") + + spo2Start := time.Now() + spo2Val, spo2Left, spo2LeftConf, spo2Right, spo2RightConf := recognizeDisplayArea( + normalized, state.Layout.SpO2Area, p.templates, "SpO2", frameNum, p.logger) + timing.OCR_SpO2 = time.Since(spo2Start).Milliseconds() + + hrStart := time.Now() + hrVal, hrLeft, hrLeftConf, hrRight, hrRightConf := recognizeDisplayArea( + normalized, state.Layout.HRArea, p.templates, "HR", frameNum, p.logger) + timing.OCR_HR = time.Since(hrStart).Milliseconds() + + reading := Reading{ + SpO2: spo2Val, + SpO2LeftDigit: spo2Left, + SpO2LeftConf: spo2LeftConf, + SpO2RightDigit: spo2Right, + SpO2RightConf: spo2RightConf, + HR: hrVal, + HRLeftDigit: hrLeft, + HRLeftConf: hrLeftConf, + HRRightDigit: hrRight, + HRRightConf: hrRightConf, + Timestamp: timestamp, + FrameNum: frameNum, + } + + // HANDLER #1: Check for corruption - ANY -1 digit means invalid/corrupted + if reading.IsCorrupted() { + validationStart := time.Now() + timing.Validation += time.Since(validationStart).Milliseconds() + + logMessage(Both, Warning, " Frame #%d: Corruption - %s", + frameNum, reading.GetCorruptionDetails()) + + // Save extra debug files in DEBUG_MODE + if DEBUG_MODE { + debugTimestamp := time.Now().Format("20060102_150405") + gocv.IMWrite(fmt.Sprintf("test_output/corruption_%s_frame%d_normalized.png", debugTimestamp, frameNum), normalized) + saveLayoutVisualization(normalized, state.Layout, fmt.Sprintf("test_output/corruption_%s_frame%d_layout.jpg", debugTimestamp, frameNum)) + logMessage(LogFile, Info, " šŸ’¾ Debug saved: %s", debugTimestamp) + } + + return handleFrameFailure(rawFrame, normalized, state.Layout, &reading, StatusCorrupted, + fmt.Sprintf("Corruption: %s", reading.GetCorruptionDetails()), state, timing, frameStartTime) + } + + // HANDLER #2: Check for unrecognized digits (negative values) + if reading.SpO2 < 0 || reading.HR < 0 { + validationStart := time.Now() + timing.Validation += time.Since(validationStart).Milliseconds() + + logMessage(LogFile, Info, " [UNRECOGNIZED] SpO2=%d, HR=%d - treating as low confidence", reading.SpO2, reading.HR) + + return handleFrameFailure(rawFrame, normalized, state.Layout, &reading, StatusLowConfidence, + fmt.Sprintf("Unrecognized: SpO2=%d, HR=%d", reading.SpO2, reading.HR), state, timing, frameStartTime) + } + + // Check if values changed + validationStart := time.Now() + valuesChanged := (reading.SpO2 != state.LastPosted.SpO2 || reading.HR != state.LastPosted.HR) + timing.Validation += time.Since(validationStart).Milliseconds() + + if !valuesChanged { + timing.Total = time.Since(frameStartTime).Milliseconds() + logMessage(LogFile, Debug, " Frame #%d: SpO2=%d%%, HR=%d bpm (no change) - processed in %dms", + frameNum, reading.SpO2, reading.HR, timing.Total) + return ProcessingResult{Status: StatusNoChange, Reading: reading} + } + + // Values changed - update state and log + state.LastPosted = reading + logMessage(Both, Info, "SpO2=%d%%, HR=%d bpm", reading.SpO2, reading.HR) + + // HANDLER #3: Check confidence + validationStart = time.Now() + action, _ := validateConfidence(&reading, state, p.logger) + timing.Validation += time.Since(validationStart).Milliseconds() + + if action == ActionRetry { + state.LowConfidenceCount++ + logMessage(Both, Warning, " Low confidence (#%d) - grabbing next frame immediately...", state.LowConfidenceCount) + + spo2Avg, hrAvg := reading.AvgConfidence() + return handleFrameFailure(rawFrame, normalized, state.Layout, &reading, StatusLowConfidence, + fmt.Sprintf("Low confidence: SpO2 %.1f%%, HR %.1f%%", spo2Avg, hrAvg), state, timing, frameStartTime) + } + + if action == ActionDiscard { + state.LowConfidenceCount++ + logMessage(Both, Warning, " Low confidence after retry (#%d)", state.LowConfidenceCount) + + spo2Avg, hrAvg := reading.AvgConfidence() + return handleFrameFailure(rawFrame, normalized, state.Layout, &reading, StatusLowConfidence, + fmt.Sprintf("Low confidence: SpO2 %.1f%%, HR %.1f%%", spo2Avg, hrAvg), state, timing, frameStartTime) + } + + // HANDLER #4: Check stability + validationStart = time.Now() + action, reason := validateStability(&reading, state, p.logger) + timing.Validation += time.Since(validationStart).Milliseconds() + + // Check for physiologically impossible values (< 40) + if reading.SpO2 < 40 || reading.HR < 40 { + timing.HASS = 0 + logMessage(Both, Warning, " Invalid physiological values: SpO2=%d, HR=%d (below 40) - not posting to HASS", + reading.SpO2, reading.HR) + + entry := buildReviewEntry(&reading, false, "", false, "") + state.ReviewEntries = append(state.ReviewEntries, entry) + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{Status: StatusSuccess, Reading: reading, ShouldPost: false} + } + + // Handle stability action + if action == ActionHold { + timing.HASS = 0 + entry := buildReviewEntry(&reading, false, "", true, reason) + state.ReviewEntries = append(state.ReviewEntries, entry) + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + logMessage(LogFile, Warning, " %s - holding for validation", reason) + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{Status: StatusUnstable, Reading: reading, ShouldPost: false, UnstableReason: reason} + } + + // SUCCESS - ActionPost or default + timing.HASS = 0 // Will be updated in main loop + entry := buildReviewEntry(&reading, false, "", false, "") + state.ReviewEntries = append(state.ReviewEntries, entry) + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{Status: StatusSuccess, Reading: reading, ShouldPost: action == ActionPost} +} + +// postReading posts a reading to Home Assistant +func (p *Processor) postReading(reading *Reading, state *ProcessingState) error { + // Skip posting if no config (single-frame test mode) + if p.config == nil { + logMessage(LogFile, Info, " ā“˜ Skipping HASS post (test mode)") + return nil + } + + spo2Err := postToHomeAssistant(p.config, "sensor.pulse_ox_spo2", reading.SpO2, "%", "SpO2") + hrErr := postToHomeAssistant(p.config, "sensor.pulse_ox_hr", reading.HR, "bpm", "Heart Rate") + + if spo2Err == nil && hrErr == nil { + state.SuccessCount++ + logMessage(LogFile, Info, " āœ“ Posted successfully (success: %d, fail: %d)", + state.SuccessCount, state.FailureCount) + return nil + } + + state.FailureCount++ + if spo2Err != nil { + logMessage(LogFile, Error, " āŒ SpO2 post error: %v", spo2Err) + } + if hrErr != nil { + logMessage(LogFile, Error, " āŒ HR post error: %v", hrErr) + } + logMessage(LogFile, Info, " (success: %d, fail: %d)", state.SuccessCount, state.FailureCount) + logMessage(Both, Error, " āŒ Post failed") + + if spo2Err != nil { + return spo2Err + } + return hrErr +} + +// saveThresholdedFrame saves the RAW thresholded frame +// Frame is ALREADY thresholded binary (from acquisition), just save it directly +// Useful for debugging and can be tested with ./pulseox-monitor raw_frames/thresh_*.png +func saveThresholdedFrame(frame gocv.Mat, frameNum int) { + filename := fmt.Sprintf("raw_frames/thresh_%s-%05d.png", time.Now().Format("20060102"), frameNum) + gocv.IMWrite(filename, frame) +} diff --git a/backups/backup_20251127/pulseox-monitor.go b/backups/backup_20251127/pulseox-monitor.go new file mode 100644 index 0000000..2003fb1 --- /dev/null +++ b/backups/backup_20251127/pulseox-monitor.go @@ -0,0 +1,534 @@ +package main + +import ( + "fmt" + "os" + "os/signal" + "syscall" + "time" + + "gocv.io/x/gocv" +) + +const VERSION = "v3.60" + +// Global debug flag +var DEBUG_MODE = false + +// Global timing flag +var TIMING_MODE = false + +// Global save crops flag +var SAVE_CROPS = false + +// Display and digit measurement constants +const ( + CUT_WIDTH = 280 + DIGIT_ONE_WIDTH = 72 + DIGIT_NON_ONE_WIDTH = 100 + MIN_BOX_HEIGHT = 110 +) + +func main() { + // Check for /help flag first + for _, arg := range os.Args[1:] { + if arg == "/help" || arg == "--help" || arg == "-h" { + showHelp() + return + } + } + + logMessage(Console, Info, "=== Pulse-Ox Monitor %s (Unified Detection) ===", VERSION) + logMessage(Console, Info, "") + + // Limit OpenCV thread pool to reduce CPU overhead + // Small images (280x200px) don't benefit from multi-threading + gocv.SetNumThreads(1) + logMessage(Console, Info, "šŸ”§ OpenCV threads limited to 1 (single-threaded for minimal overhead)") + logMessage(Console, Info, "") + + // Check for flags + for _, arg := range os.Args[1:] { + if arg == "/debug" { + DEBUG_MODE = true + DEBUG = true // Also enable detection.go debug + logMessage(Console, Info, "šŸ› DEBUG MODE ENABLED") + } else if arg == "/timing" { + TIMING_MODE = true + logMessage(Console, Info, "ā±ļø TIMING MODE ENABLED") + } else if arg == "/crops" { + SAVE_CROPS = true + logMessage(Console, Info, "āœ‚ļø SAVE CROPS ENABLED (individual digit images)") + } + } + + // Check if running in single-frame test mode + if len(os.Args) >= 2 { + // Filter out flags + framePath := "" + for _, arg := range os.Args[1:] { + if arg != "/debug" && arg != "/timing" && arg != "/crops" { + framePath = arg + break + } + } + if framePath != "" { + runSingleFrameMode(framePath) + return + } + } + + // Normal streaming mode + runStreamingMode() +} + +func showHelp() { + logMessage(Console, Info, "=== Pulse-Ox Monitor %s ===", VERSION) + logMessage(Console, Info, "") + logMessage(Console, Info, "USAGE:") + logMessage(Console, Info, " pulseox-monitor [options] [framefile]") + logMessage(Console, Info, "") + logMessage(Console, Info, "OPTIONS:") + logMessage(Console, Info, " /help Show this help message") + logMessage(Console, Info, " /debug Enable debug mode (extra diagnostic output)") + logMessage(Console, Info, " /timing Show timing table for performance analysis") + logMessage(Console, Info, " /crops Save individual digit crop images (for approving templates)") + logMessage(Console, Info, "") + logMessage(Console, Info, "MODES:") + logMessage(Console, Info, " No arguments - Run in streaming mode (RTSP camera)") + logMessage(Console, Info, " [framefile] - Test mode: process single PNG file") + logMessage(Console, Info, "") + logMessage(Console, Info, "EXAMPLES:") + logMessage(Console, Info, " pulseox-monitor # Normal streaming") + logMessage(Console, Info, " pulseox-monitor /crops # Streaming with digit crops") + logMessage(Console, Info, " pulseox-monitor /timing # Show performance timing") + logMessage(Console, Info, " pulseox-monitor raw_frames/thresh_*.png # Test single frame") + logMessage(Console, Info, " pulseox-monitor /debug /crops # Multiple flags") + logMessage(Console, Info, "") + logMessage(Console, Info, "OUTPUT:") + logMessage(Console, Info, " review/ - Processed frame images and review.html") + logMessage(Console, Info, " raw_frames/ - Failed recognition frames (for debugging)") + logMessage(Console, Info, " test_output/ - Layout detection debug images") + logMessage(Console, Info, " pulse-monitor_*.log - Detailed execution log") + logMessage(Console, Info, "") +} + +func runSingleFrameMode(framePath string) { + logMessage(Console, Info, "=== Single Frame Test Mode ===") + logMessage(Console, Info, "Loading frame: %s", framePath) + logMessage(Console, Info, "") + + // Automatically enable DEBUG_MODE and SAVE_CROPS for file processing + DEBUG_MODE = true + DEBUG = true // detection.go debug flag + SAVE_CROPS = true + logMessage(Console, Info, "šŸ› DEBUG MODE AUTO-ENABLED (file processing)") + logMessage(Console, Info, "āœ‚ļø SAVE CROPS AUTO-ENABLED (file processing)") + logMessage(Console, Info, "") + + // In test mode, all output goes to console only (no separate file log) + // Leave globalLogger as nil to avoid duplication when using Both target + globalLogger = nil + + // Setup + os.MkdirAll("test_output", 0755) + os.MkdirAll("review", 0755) + + // Load templates + logMessage(Console, Info, "Loading templates...") + templates, err := loadTemplates() + if err != nil { + logMessage(Console, Error, "āŒ Error loading templates: %v", err) + return + } + defer closeTemplates(templates) + logMessage(Console, Info, "āœ“ Templates loaded") + logMessage(Console, Info, "") + + // Create frame source + source := NewFileSource(framePath) + defer source.Close() + + // Create processor + processor := NewProcessor(templates, nil, nil) + state := NewProcessingState() + + // Process single frame with unified loop + processFrames(source, processor, state) + + logMessage(Console, Info, "") + logMessage(Console, Info, "āœ“ Single frame test complete") + logMessage(Console, Info, "") + logMessage(Console, Info, "=== OUTPUT FILES ===") + logMessage(Console, Info, "Test outputs:") + logMessage(Console, Info, " test_output/layout_boxes.jpg - Layout visualization") + logMessage(Console, Info, " review/f*_spo2_full.png - SpO2 recognition") + logMessage(Console, Info, " review/f*_hr_full.png - HR recognition") +} + +func runStreamingMode() { + // Create log file + logFilename := fmt.Sprintf("pulse-monitor_%s.log", time.Now().Format("20060102_150405")) + logFile, err := os.OpenFile(logFilename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + logMessage(Console, Warning, "Warning: Could not create log file: %v", err) + logMessage(Console, Info, "Continuing without file logging...") + logMessage(Console, Info, "") + globalLogger = nil + } else { + defer logFile.Close() + globalLogger = logFile + logMessage(Both, Info, "šŸ“ Logging to: %s", logFilename) + } + + // Setup directories - clean output directories in streaming mode + logMessage(Console, Info, "šŸ—‘ļø Cleaning output directories...") + + logMessage(Console, Info, " - review/... āœ“") + os.RemoveAll("review") + + logMessage(Console, Info, " - raw_frames/... āœ“") + os.RemoveAll("raw_frames") + + logMessage(Console, Info, " - test_output/... āœ“") + os.RemoveAll("test_output") + + os.MkdirAll("review", 0755) + os.MkdirAll("raw_frames", 0755) + os.MkdirAll("test_output", 0755) + logMessage(Console, Info, "") + + // Initialize live review HTML + if err := initReviewHTML(); err != nil { + logMessage(Console, Warning, "Warning: Could not initialize review HTML: %v", err) + } else { + logMessage(Console, Info, "āœ… Live review HTML initialized: review/review.html (refresh browser to see updates)") + } + + // Load config + config, err := LoadConfig("config.yaml") + if err != nil { + logMessage(Console, Error, "Error loading config: %v", err) + return + } + + // Load templates + templates, err := loadTemplates() + if err != nil { + logMessage(Console, Error, "Error loading templates: %v", err) + return + } + defer closeTemplates(templates) + + // Initialize timestamp OCR client (reusable) + InitTimestampOCR() + defer CloseTimestampOCR() + + logMessage(Console, Info, "šŸ“Š All processed frames saved to review/") + logMessage(Console, Info, " Press Ctrl+C to stop and generate review.html") + logMessage(Console, Info, "") + + // Create RTSP source with reconnection handling + logMessage(Console, Info, "Connecting to RTSP stream...") + var source *RTSPSource + for { + source, err = NewRTSPSource(config.Camera.RTSPURL) + if err == nil { + break + } + logMessage(Console, Warning, "Failed to connect: %v", err) + logMessage(Console, Info, "Retrying in 5 seconds...") + time.Sleep(5 * time.Second) + } + defer source.Close() + + logMessage(Console, Info, "āœ“ Connected! Press Ctrl+C to stop") + logMessage(Console, Info, "Posting to HASS: %s", config.HomeAssistant.URL) + logMessage(Console, Info, "") + + // Setup signal handler + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) + go func() { + <-sigChan + logMessage(Console, Info, "") + logMessage(Console, Info, "") + logMessage(Console, Info, "šŸ›‘ Received stop signal, finishing up...") + source.Close() // Safe to call multiple times now (sync.Once) + }() + + // Create processor + processor := NewProcessor(templates, config, logFile) + state := NewProcessingState() + + // Run unified processing loop + processFrames(source, processor, state) + + // Close review HTML + logMessage(Console, Info, "") + logMessage(Console, Info, "šŸ“ Closing review.html...") + if err := closeReviewHTML(); err != nil { + logMessage(Console, Error, "Error closing review HTML: %v", err) + } else { + logMessage(Console, Info, "āœ“ Review page completed: review/review.html (%d frames)", len(state.ReviewEntries)) + logMessage(Console, Info, " Open it in browser to review recognition results") + } +} + +// interruptibleSleep sleeps for the specified duration, but checks every second if source is closed +func interruptibleSleep(source FrameSource, duration time.Duration) { + remaining := duration + for remaining > 0 { + sleepTime := time.Second + if remaining < sleepTime { + sleepTime = remaining + } + time.Sleep(sleepTime) + remaining -= sleepTime + + // Check if source was closed (Ctrl+C pressed) + if !source.IsActive() { + return + } + } +} + +// processFrames is the UNIFIED processing loop - works for both RTSP and single-frame +func processFrames(source FrameSource, processor *Processor, state *ProcessingState) { + logMessage(Console, Info, "Processing frames from: %s", source.Name()) + logMessage(Console, Info, "") + + for { + loopStart := time.Now() + logMessage(Console, Debug, "[LOOP] Starting iteration") + + // Initialize timing data + var timing TimingData + acquireStart := time.Now() + + // Read raw frame from source + var frame gocv.Mat + var shouldContinue bool + var err error + + if state.ConsecutiveFailures == 1 { + // First failure: grab IMMEDIATE next frame (bypass skip) + logMessage(Console, Debug, " Using NextImmediate() due to consecutive failure") + frame, shouldContinue, err = source.NextImmediate() + } else { + // Normal operation or 2nd+ failure: use normal skip + frame, shouldContinue, err = source.Next() + } + timing.Acquire = time.Since(acquireStart).Milliseconds() + logMessage(Console, Debug, "[LOOP] Frame acquired in %dms", timing.Acquire) + + if err != nil { + logMessage(Both, Error, "Error reading frame: %v", err) + if !shouldContinue { + break + } + continue + } + if frame.Empty() { + if !shouldContinue { + break + } + continue + } + + // Check timestamp every 10 processed frames (BEFORE preprocessing, on colored frame) + state.ProcessedCount++ + state.TimestampCheckCounter++ + if state.TimestampCheckCounter >= 10 { + state.TimestampCheckCounter = 0 + diff, _, err := extractTimestamp(frame) + if err != nil { + logMessage(Both, Warning, " Timestamp OCR failed: %v", err) + } else if diff > 3 { + logMessage(Both, Warning, " Camera timestamp lag: %ds behind server", diff) + } else if diff < -3 { + logMessage(Both, Warning, " Camera timestamp ahead: %ds ahead of server", -diff) + } + // Silent if drift is within ±3 seconds + } + + // Save truly raw frame (before any preprocessing) - threshold to reduce file size + rawFrameClone := frame.Clone() + rawGray := gocv.NewMat() + gocv.CvtColor(rawFrameClone, &rawGray, gocv.ColorBGRToGray) + rawThresholded := gocv.NewMat() + gocv.Threshold(rawGray, &rawThresholded, 240, 255, gocv.ThresholdBinary) + rawGray.Close() + rawFrameClone.Close() + + // Preprocess frame: crop timestamp + rotate 90° (stays colored for now) + preprocessStart := time.Now() + logMessage(Console, Debug, " Before preprocess: %dx%d", frame.Cols(), frame.Rows()) + preprocessed := preprocessFrame(frame) + logMessage(Console, Debug, " After preprocess: %dx%d", preprocessed.Cols(), preprocessed.Rows()) + frame.Close() + timing.Preprocess = time.Since(preprocessStart).Milliseconds() + + // Threshold to binary (240 threshold) + thresholdStart := time.Now() + gray := gocv.NewMat() + gocv.CvtColor(preprocessed, &gray, gocv.ColorBGRToGray) + preprocessed.Close() + binary := gocv.NewMat() + gocv.Threshold(gray, &binary, 240, 255, gocv.ThresholdBinary) + gray.Close() + timing.Threshold = time.Since(thresholdStart).Milliseconds() + logMessage(Console, Debug, "[LOOP] Threshold in %dms", timing.Threshold) + + // ========== DETECTION (if needed) ========== + if state.NeedsDetection() { + logMessage(Both, Info, "šŸ” Running detection...") + + result := DetectRotationAndWidth(binary) + + if !result.Success { + // Detection failed + state.ConsecutiveFailures++ + binary.Close() + rawThresholded.Close() + + // Escalation strategy + if state.ConsecutiveFailures == 1 { + logMessage(Both, Warning, " Detection failed (1st try) - trying next frame...") + } else if state.ConsecutiveFailures == 2 { + logMessage(Both, Warning, " Detection failed (2nd try) - trying next frame...") + } else if state.ConsecutiveFailures == 3 { + logMessage(Both, Warning, " Detection failed (3rd try) - waiting 10s...") + interruptibleSleep(source, 10*time.Second) + } else if state.ConsecutiveFailures == 4 { + logMessage(Both, Warning, " Detection failed (4th try) - waiting 30s...") + interruptibleSleep(source, 30*time.Second) + } else { + logMessage(Both, Info, " ā³ Pulse oximeter not detected (day mode) - waiting 60s...") + interruptibleSleep(source, 60*time.Second) + } + + if !shouldContinue { + break + } + continue + } + + // Detection succeeded - store results + state.LockedRotation = result.Rotation + state.LockedScale = result.ScaleFactor + state.Layout = &ScreenLayout{ + SpO2Area: result.SpO2, + HRArea: result.HR, + } + state.LayoutValid = true + state.ConsecutiveFailures = 0 + + logMessage(Both, Info, "āœ“ Detection complete:") + logMessage(Both, Info, " Rotation: %.3f°", result.Rotation) + logMessage(Both, Info, " Scale: %.3f (width %dpx → 860px)", result.ScaleFactor, result.Width) + logMessage(Both, Info, " SpO2: X[%d-%d] Y[%d-%d]", + result.SpO2.Min.X, result.SpO2.Max.X, result.SpO2.Min.Y, result.SpO2.Max.Y) + logMessage(Both, Info, " HR: X[%d-%d] Y[%d-%d]", + result.HR.Min.X, result.HR.Max.X, result.HR.Min.Y, result.HR.Max.Y) + } + + // ========== APPLY TRANSFORMS ========== + // Rotate + var rotated gocv.Mat + if state.LockedRotation != 0.0 { + rotated = RotateImage(binary, state.LockedRotation) + binary.Close() + } else { + rotated = binary + } + + // Scale + var scaled gocv.Mat + if state.LockedScale != 1.0 { + scaled = ScaleByFactor(rotated, state.LockedScale) + rotated.Close() + } else { + scaled = rotated + } + + // Validate frame dimensions vs layout coordinates + if state.Layout != nil { + logMessage(Both, Debug, "Frame size: %dx%d", scaled.Cols(), scaled.Rows()) + logMessage(Both, Debug, "SpO2 area: X[%d-%d] Y[%d-%d]", + state.Layout.SpO2Area.Min.X, state.Layout.SpO2Area.Max.X, + state.Layout.SpO2Area.Min.Y, state.Layout.SpO2Area.Max.Y) + logMessage(Both, Debug, "HR area: X[%d-%d] Y[%d-%d]", + state.Layout.HRArea.Min.X, state.Layout.HRArea.Max.X, + state.Layout.HRArea.Min.Y, state.Layout.HRArea.Max.Y) + + // Check bounds + if state.Layout.SpO2Area.Max.X > scaled.Cols() || state.Layout.SpO2Area.Max.Y > scaled.Rows() || + state.Layout.HRArea.Max.X > scaled.Cols() || state.Layout.HRArea.Max.Y > scaled.Rows() { + logMessage(Both, Error, "Layout coordinates exceed frame dimensions! Resetting detection.") + state.ResetDetection() + scaled.Close() + rawThresholded.Close() + continue + } + } + + // ========== PROCESS FRAME ========== + logMessage(Console, Debug, "[LOOP] Starting OCR") + timing.FrameNum = state.ProcessedCount + result := processor.processFrame(scaled, rawThresholded, state.ProcessedCount, state, &timing) + logMessage(Console, Debug, "[LOOP] OCR complete, status=%d", result.Status) + scaled.Close() + rawThresholded.Close() + + // Print timing table (header every 20 frames) + state.TimingFrameCount++ + showHeader := (state.TimingFrameCount % 20) == 1 + printTimingTable(timing, showHeader) + + // Handle result + switch result.Status { + case StatusSuccess: + state.ConsecutiveFailures = 0 + state.LowConfidenceCount = 0 // Reset on success + if result.ShouldPost { + hassStart := time.Now() + processor.postReading(&result.Reading, state) + timing.HASS = time.Since(hassStart).Milliseconds() + timing.Total += timing.HASS + } + + case StatusCorrupted: + state.ConsecutiveFailures++ + if state.ConsecutiveFailures == 2 { + logMessage(Both, Warning, " Re-detecting layout...") + state.ResetDetection() + } + + case StatusLowConfidence: + // Low confidence is a template matching issue, NOT a layout issue + // Counter already incremented in processor.go + // Don't re-detect - just continue + + case StatusNoChange: + // Normal - don't increment failure counter + + case StatusUnstable: + // Held for validation - don't increment failure counter + } + + if !shouldContinue { + break + } + + logMessage(Console, Debug, "[LOOP] Iteration complete in %dms", time.Since(loopStart).Milliseconds()) + } +} + +func closeTemplates(templates map[int][]gocv.Mat) { + for _, templateList := range templates { + for _, t := range templateList { + t.Close() + } + } +} diff --git a/backups/backup_20251127/test_rotation.go b/backups/backup_20251127/test_rotation.go new file mode 100644 index 0000000..8cc164f --- /dev/null +++ b/backups/backup_20251127/test_rotation.go @@ -0,0 +1,230 @@ +//go:build ignore +// +build ignore + +// This is a standalone test program for rotation detection. +// Run with: go run test_rotation.go detection.go + +package main + +import ( + "fmt" + "log" + "os" + + "gocv.io/x/gocv" +) + +// DEBUG flag for this test program (separate from main build) +// Note: When running this test, set DEBUG = true to enable debug output in detection.go +var DEBUG = false + +func main() { + // Parse command line args + if len(os.Args) > 1 && os.Args[1] == "/debug" { + DEBUG = true + fmt.Println("DEBUG mode enabled\n") + } + + // Hardcode RTSP URL + streamURL := "rtsp://tapohass:!!Helder06@192.168.2.183:554/stream1" + + fmt.Println("Rotation Detection System") + fmt.Println("========================") + fmt.Println("Processing frames...\n") + + // Open RTSP stream + stream, err := gocv.OpenVideoCapture(streamURL) + if err != nil { + log.Fatalf("Failed to open stream: %v", err) + } + defer stream.Close() + + // Initialize detection state + rotation := 9999.0 // Sentinel value for "not detected" + width := 0 // 0 means "not detected" + scaleFactor := 0.0 // Scale factor from detection + + // Frame processing counters + framesSkipped := 0 + framesProcessed := 0 + detectionAttempts := 0 + totalFramesRead := 0 + SKIP_COUNT := 7 // Process every 8th frame + MAX_DETECTION_ATTEMPTS := 100 // Give up after 100 attempts + RE_DETECT_AFTER := 0 // Re-detect after N frames (0 = disabled) + + // Main processing loop + frame := gocv.NewMat() + defer frame.Close() + + for { + // Acquire a frame + if ok := stream.Read(&frame); !ok || frame.Empty() { + log.Fatal("Failed to read frame from stream") + } + totalFramesRead++ + + // Preprocess frame to binary + binary := PreprocessFrame(frame) + + // If rotation is 9999 or width is 0, detect bands/width/rotation + if rotation == 9999 || width == 0 || scaleFactor == 0.0 { + detectionAttempts++ + + // Give up if too many attempts + if detectionAttempts > MAX_DETECTION_ATTEMPTS { + fmt.Printf("āŒ Detection failed after %d attempts. Exiting.\n", MAX_DETECTION_ATTEMPTS) + break + } + + result := DetectRotationAndWidth(binary) + if result.Success { + rotation = result.Rotation + width = result.Width + scaleFactor = result.ScaleFactor + fmt.Printf("āœ“ DETECTION SUCCESSFUL (frame %d, attempt #%d):\n", totalFramesRead, detectionAttempts) + fmt.Printf(" Width=%dpx, Rotation=%.3f°, Scale=%.3f\n\n", width, rotation, scaleFactor) + detectionAttempts = 0 // Reset counter + } else { + // Show detection attempts with frame info + if detectionAttempts <= 5 { + if DEBUG { + fmt.Printf("Frame %d: Detection attempt #%d failed\n", totalFramesRead, detectionAttempts) + } + } else { + fmt.Printf("⚠ Frame %d: Detection attempt #%d failed - retrying...\n", totalFramesRead, detectionAttempts) + } + // Clean up and continue to next frame + binary.Close() + + // Skip a few frames to ensure display has changed + for skip := 0; skip < 3; skip++ { + if ok := stream.Read(&frame); !ok { + break + } + } + continue + } + } + + // If rotation != 9999 and width != 0 and scaleFactor != 0.0 and we've skipped enough frames + if rotation != 9999 && width != 0 && scaleFactor != 0.0 && framesSkipped >= SKIP_COUNT { + framesProcessed++ + + // Apply the same rotation correction to this frame + // (rotation was detected on a previous frame, now apply it to this one) + rotated := RotateImage(binary, rotation) + defer rotated.Close() + + // Save rotated frame BEFORE scaling + if DEBUG && framesProcessed == 1 { + gocv.IMWrite("debug_frame_001_AFTER_ROTATION_BEFORE_SCALE.png", rotated) + fmt.Println("DEBUG: Saved rotated frame before scaling") + } + + // Apply the same scaling using detected scale factor + // This scales the entire frame so the baseline becomes 860px + scaled := ScaleByFactor(rotated, scaleFactor) + defer scaled.Close() + + fmt.Printf("Frame #%d processed: Rotated %.3f°, Scaled to %dx%d (factor: %.3f, baseline: %dpx -> 860px)\n", + framesProcessed, rotation, scaled.Cols(), scaled.Rows(), scaleFactor, width) + + // Re-detect bands on scaled image for visualization + if DEBUG && framesProcessed <= 5 { + // Use the detected scale factor for minimum height + scaledMinHeight := int(80.0 * scaleFactor) + bands := DetectBands(scaled, scaledMinHeight) + + if len(bands) >= 2 { + // Find tallest band (graph) + tallestIdx := 0 + tallestHeight := 0 + for i, band := range bands { + height := band.maxY - band.minY + if height > tallestHeight { + tallestHeight = height + tallestIdx = i + } + } + + if tallestIdx < len(bands)-1 { + graphBand := bands[tallestIdx] + digitBand := bands[tallestIdx+1] + + // Extract digit region + digitRegion := ExtractBandRegion(scaled, digitBand) + defer digitRegion.Close() + + // Find digit boxes + digitBoxes := FindDigitBoxes(digitRegion, scaledMinHeight) + + if DEBUG { + fmt.Printf(" DEBUG: FindDigitBoxes returned %d boxes:\n", len(digitBoxes)) + for i, box := range digitBoxes { + fmt.Printf(" Box %d: X[%d-%d] Y[%d-%d] Size: %dx%d\n", + i+1, box.Min.X, box.Max.X, box.Min.Y, box.Max.Y, + box.Dx(), box.Dy()) + } + } + + // Calculate center from scaled baseline points + // Use the same proportion as detected + scaledCenterX := int(float64(scaled.Cols()) / 2.0) + + // Merge into displays + displayAreas := mergeDigitBoxesWithCenter(digitRegion, digitBoxes, scaledCenterX) + + fmt.Println(" DEBUG: Saving visualizations...") + + // Visualize bands + VisualizeSimpleBands(scaled, graphBand, digitBand, + fmt.Sprintf("debug_frame_%03d_bands.png", framesProcessed)) + + // Save digit region + gocv.IMWrite(fmt.Sprintf("debug_frame_%03d_digits.png", framesProcessed), digitRegion) + + // Visualize digit boxes + VisualizeDigitBoxes(digitRegion, digitBoxes, digitBand.minY, + fmt.Sprintf("debug_frame_%03d_digitboxes.png", framesProcessed)) + + // Visualize merged displays + VisualizeDetectedDisplays(digitRegion, displayAreas, digitBoxes, scaledCenterX, + fmt.Sprintf("debug_frame_%03d_merged.png", framesProcessed)) + + fmt.Printf(" DEBUG: Saved 4 visualization files for frame %d\n", framesProcessed) + } + } + } + + // Save scaled frame if in DEBUG mode + if DEBUG && framesProcessed <= 5 { + filename := fmt.Sprintf("debug_frame_%03d_scaled.png", framesProcessed) + gocv.IMWrite(filename, scaled) + fmt.Printf("DEBUG: Saved %s\n", filename) + } + + // TODO: Add OCR processing on 'scaled' image here in later phase + // Display areas (SpO2 and HR) are already detected and stored in result + // They will be available when we integrate this with the main monitoring system + + framesSkipped = 0 + + // Optional: Force re-detection after N frames to adapt to changes + if RE_DETECT_AFTER > 0 && framesProcessed >= RE_DETECT_AFTER { + fmt.Printf("\nšŸ”„ Re-detecting after %d frames...\n", RE_DETECT_AFTER) + rotation = 9999 + width = 0 + scaleFactor = 0.0 + framesProcessed = 0 + } + } else { + framesSkipped++ + } + + // Clean up + binary.Close() + } + + fmt.Println("\nProgram terminated.") +} diff --git a/backups/backup_20251127/timestamp_ocr.go b/backups/backup_20251127/timestamp_ocr.go new file mode 100644 index 0000000..42c95c4 --- /dev/null +++ b/backups/backup_20251127/timestamp_ocr.go @@ -0,0 +1,109 @@ +package main + +import ( + "fmt" + "image" + "regexp" + "strconv" + "strings" + "time" + + "github.com/otiai10/gosseract/v2" + "gocv.io/x/gocv" +) + +// Global reusable OCR client +var timestampOCRClient *gosseract.Client + +// InitTimestampOCR initializes the reusable OCR client +func InitTimestampOCR() { + timestampOCRClient = gosseract.NewClient() + timestampOCRClient.SetPageSegMode(gosseract.PSM_SINGLE_LINE) + timestampOCRClient.SetWhitelist("0123456789-: ") +} + +// CloseTimestampOCR closes the OCR client +func CloseTimestampOCR() { + if timestampOCRClient != nil { + timestampOCRClient.Close() + } +} + +// extractTimestamp extracts and OCRs the timestamp from the top of the frame +// Returns: (secondsDifference, ocrDurationMs, error) +func extractTimestamp(frame gocv.Mat) (int, int64, error) { + startTime := time.Now() + + // Extract 1024x68 region + timestampRegion := frame.Region(image.Rect(0, 0, 1024, 68)) + defer timestampRegion.Close() + + // Downscale 50% (reduces OCR processing time) + downscaled := gocv.NewMat() + defer downscaled.Close() + gocv.Resize(timestampRegion, &downscaled, image.Point{512, 34}, 0, 0, gocv.InterpolationArea) + + // Grayscale + gray := gocv.NewMat() + defer gray.Close() + gocv.CvtColor(downscaled, &gray, gocv.ColorBGRToGray) + + // Binary threshold (Otsu automatically finds best threshold) + binary := gocv.NewMat() + defer binary.Close() + gocv.Threshold(gray, &binary, 0, 255, gocv.ThresholdBinary|gocv.ThresholdOtsu) + + // Invert (white text on black → black text on white for better OCR) + inverted := gocv.NewMat() + defer inverted.Close() + gocv.BitwiseNot(binary, &inverted) + + // Encode as PNG (lossless, better for text) + buf, err := gocv.IMEncode(".png", inverted) + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("failed to encode: %w", err) + } + defer buf.Close() + + // OCR with REUSED client (major speed boost) + err = timestampOCRClient.SetImageFromBytes(buf.GetBytes()) + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("failed to set image: %w", err) + } + + text, err := timestampOCRClient.Text() + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("OCR failed: %w", err) + } + + // Parse timestamp + text = strings.TrimSpace(text) + text = strings.ReplaceAll(text, "\n", "") + + // Pattern: YYYY-MM-DD HH:MM:SS + re := regexp.MustCompile(`(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})`) + matches := re.FindStringSubmatch(text) + + if len(matches) != 7 { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("could not parse timestamp from: '%s'", text) + } + + // Parse components + year, _ := strconv.Atoi(matches[1]) + month, _ := strconv.Atoi(matches[2]) + day, _ := strconv.Atoi(matches[3]) + hour, _ := strconv.Atoi(matches[4]) + minute, _ := strconv.Atoi(matches[5]) + second, _ := strconv.Atoi(matches[6]) + + // Create timestamp in local timezone + cameraTime := time.Date(year, time.Month(month), day, hour, minute, second, 0, time.Local) + serverTime := time.Now() + + // Calculate difference + diff := int(serverTime.Sub(cameraTime).Seconds()) + + ocrDuration := time.Since(startTime).Milliseconds() + + return diff, ocrDuration, nil +} diff --git a/backups/backup_20251127/training_digits/0_1.png b/backups/backup_20251127/training_digits/0_1.png new file mode 100644 index 0000000..c66f01f Binary files /dev/null and b/backups/backup_20251127/training_digits/0_1.png differ diff --git a/backups/backup_20251127/training_digits/1_1.png b/backups/backup_20251127/training_digits/1_1.png new file mode 100644 index 0000000..8c86c66 Binary files /dev/null and b/backups/backup_20251127/training_digits/1_1.png differ diff --git a/backups/backup_20251127/training_digits/2_1.png b/backups/backup_20251127/training_digits/2_1.png new file mode 100644 index 0000000..051c23a Binary files /dev/null and b/backups/backup_20251127/training_digits/2_1.png differ diff --git a/backups/backup_20251127/training_digits/2_2.png b/backups/backup_20251127/training_digits/2_2.png new file mode 100644 index 0000000..6be0ed0 Binary files /dev/null and b/backups/backup_20251127/training_digits/2_2.png differ diff --git a/backups/backup_20251127/training_digits/3_1.png b/backups/backup_20251127/training_digits/3_1.png new file mode 100644 index 0000000..a553325 Binary files /dev/null and b/backups/backup_20251127/training_digits/3_1.png differ diff --git a/backups/backup_20251127/training_digits/5_1.png b/backups/backup_20251127/training_digits/5_1.png new file mode 100644 index 0000000..e928dc2 Binary files /dev/null and b/backups/backup_20251127/training_digits/5_1.png differ diff --git a/backups/backup_20251127/training_digits/7_1.png b/backups/backup_20251127/training_digits/7_1.png new file mode 100644 index 0000000..1370abc Binary files /dev/null and b/backups/backup_20251127/training_digits/7_1.png differ diff --git a/backups/backup_20251127/training_digits/8_1.png b/backups/backup_20251127/training_digits/8_1.png new file mode 100644 index 0000000..4f1bacb Binary files /dev/null and b/backups/backup_20251127/training_digits/8_1.png differ diff --git a/backups/backup_20251127/training_digits/9_1.png b/backups/backup_20251127/training_digits/9_1.png new file mode 100644 index 0000000..a07c65a Binary files /dev/null and b/backups/backup_20251127/training_digits/9_1.png differ diff --git a/backups/backup_20251127/training_digits/9_2.png b/backups/backup_20251127/training_digits/9_2.png new file mode 100644 index 0000000..bb809b8 Binary files /dev/null and b/backups/backup_20251127/training_digits/9_2.png differ diff --git a/backups/backup_20251127/types.go b/backups/backup_20251127/types.go new file mode 100644 index 0000000..40a4405 --- /dev/null +++ b/backups/backup_20251127/types.go @@ -0,0 +1,183 @@ +package main + +import ( + "gocv.io/x/gocv" +) + +// Global DEBUG flag for detection.go (set by pulseox-monitor.go) +var DEBUG = false + +// ProcessingStatus represents the outcome of processing a frame +type ProcessingStatus int + +const ( + StatusSuccess ProcessingStatus = iota + StatusCorrupted + StatusLowConfidence + StatusLayoutInvalid + StatusNoChange + StatusUnstable +) + +// Reading holds SpO2 and HR values with confidence scores +type Reading struct { + SpO2 int + SpO2LeftDigit int + SpO2LeftConf float64 + SpO2RightDigit int + SpO2RightConf float64 + HR int + HRLeftDigit int + HRLeftConf float64 + HRRightDigit int + HRRightConf float64 + Timestamp string + FrameNum int +} + +// AvgConfidence returns average confidence for SpO2 and HR +func (r *Reading) AvgConfidence() (spo2Avg, hrAvg float64) { + spo2Avg = (r.SpO2LeftConf + r.SpO2RightConf) / 2.0 + hrAvg = (r.HRLeftConf + r.HRRightConf) / 2.0 + return +} + +// IsCorrupted checks if any digit is marked as invalid (-1) +func (r *Reading) IsCorrupted() bool { + return r.SpO2LeftDigit == -1 || r.SpO2RightDigit == -1 || + r.HRLeftDigit == -1 || r.HRRightDigit == -1 +} + +// GetCorruptionDetails returns a description of which digit(s) are corrupted +func (r *Reading) GetCorruptionDetails() string { + corrupted := []string{} + if r.SpO2LeftDigit == -1 { + corrupted = append(corrupted, "SpO2 digit 2") + } + if r.SpO2RightDigit == -1 { + corrupted = append(corrupted, "SpO2 digit 3") + } + if r.HRLeftDigit == -1 { + corrupted = append(corrupted, "HR digit 2") + } + if r.HRRightDigit == -1 { + corrupted = append(corrupted, "HR digit 3") + } + if len(corrupted) == 0 { + return "unknown" + } + result := "" + for i, c := range corrupted { + if i > 0 { + result += ", " + } + result += c + } + return result +} + +// ProcessingResult contains the outcome of processing a single frame +type ProcessingResult struct { + Status ProcessingStatus + Reading Reading + ShouldPost bool + UnstableReason string +} + +// ProcessingState tracks all state across frame processing +type ProcessingState struct { + // Detection state (sentinels: rotation=9999.0, scaleFactor=0.0) + LockedRotation float64 // Rotation angle in degrees (9999 = not detected) + LockedScale float64 // Scale factor (0 = not detected) + Layout *ScreenLayout // SpO2 and HR display areas + LayoutValid bool // True when detection has succeeded + + // Change detection + LastPosted Reading + + // Stability tracking + LastReading Reading + HasLastReading bool + PendingReading *Reading + BaselineReading Reading + + // Retry state + InRetry bool + RetryCount int + MaxRetries int + ConsecutiveFailures int // Track consecutive layout/processing failures + + // Statistics + FrameCount int + ProcessedCount int + SuccessCount int + FailureCount int + LowConfidenceCount int + TimestampCheckCounter int // Check timestamp every 10 processed frames + + // Review entries + ReviewEntries []ReviewEntry + + // Timing tracking + TimingFrameCount int // Count frames for periodic header +} + +// NewProcessingState creates initial state +func NewProcessingState() *ProcessingState { + return &ProcessingState{ + LockedRotation: 9999.0, // Sentinel: not detected + LockedScale: 0.0, // Sentinel: not detected + LayoutValid: false, + MaxRetries: 1, + LastPosted: Reading{SpO2: -1, HR: -1}, + LastReading: Reading{SpO2: -1, HR: -1}, + BaselineReading: Reading{SpO2: -1, HR: -1}, + } +} + +// ResetDetection resets detection state to trigger re-detection +func (s *ProcessingState) ResetDetection() { + s.LockedRotation = 9999.0 + s.LockedScale = 0.0 + s.LayoutValid = false + s.Layout = nil +} + +// NeedsDetection returns true if detection needs to run +func (s *ProcessingState) NeedsDetection() bool { + return s.LockedRotation == 9999.0 || s.LockedScale == 0.0 || !s.LayoutValid +} + +// Action represents what to do next +type Action int + +const ( + ActionContinue Action = iota // Continue to next frame + ActionRetry // Retry with next frame + layout re-detection + ActionPost // Post reading to Home Assistant + ActionHold // Hold reading for validation + ActionDiscard // Discard reading +) + +// FrameData represents a preprocessed frame ready for processing +type FrameData struct { + Original gocv.Mat + Normalized gocv.Mat + FrameNum int + Timestamp string +} + +// TimingData holds timing measurements for a single frame +type TimingData struct { + FrameNum int + Acquire int64 // Frame acquisition (ms) + Threshold int64 // Grayscale + threshold (ms) + Preprocess int64 // Crop + rotate (ms) + Scale int64 // Frame scaling (ms) + OCR_SpO2 int64 // SpO2 recognition (ms) + OCR_HR int64 // HR recognition (ms) + Validation int64 // Validation checks (ms) + FileIO int64 // Saving review images (ms) + HASS int64 // Home Assistant POST (ms) + Total int64 // Total processing time (ms) +} diff --git a/backups/backup_20251127/validators.go b/backups/backup_20251127/validators.go new file mode 100644 index 0000000..310369e --- /dev/null +++ b/backups/backup_20251127/validators.go @@ -0,0 +1,144 @@ +package main + +import ( + "fmt" + "io" + "time" +) + +// Validator functions handle the three-tier exception system + +// validateCorruption checks for corrupted digits (Handler #1) +func validateCorruption(reading *Reading, logger io.Writer) (Action, string) { + if reading.IsCorrupted() { + logMessage(LogFile, Info, " [CORRUPTION] Invalid digit detected: SpO2(%d,%d) HR(%d,%d) - skipping frame", + reading.SpO2LeftDigit, reading.SpO2RightDigit, + reading.HRLeftDigit, reading.HRRightDigit) + return ActionContinue, "" + } + return ActionContinue, "" +} + +// validateConfidence checks OCR confidence (Handler #2) +func validateConfidence(reading *Reading, state *ProcessingState, logger io.Writer) (Action, string) { + spo2Avg, hrAvg := reading.AvgConfidence() + + // High confidence - pass through + if spo2Avg >= 85 && hrAvg >= 85 { + state.InRetry = false // Clear retry flag + return ActionContinue, "" + } + + // Low confidence + if !state.InRetry { + // First low confidence - trigger retry + state.InRetry = true + logMessage(LogFile, Info, " [LOW CONFIDENCE] SpO2: %.1f%%, HR: %.1f%% - retrying with layout re-detection", + spo2Avg, hrAvg) + return ActionRetry, "" + } + + // Second consecutive low confidence - give up + state.InRetry = false + logMessage(LogFile, Info, " [LOW CONFIDENCE] Still low after retry (SpO2: %.1f%%, HR: %.1f%%) - pausing", + spo2Avg, hrAvg) + return ActionDiscard, "" +} + +// validateStability checks for large deltas with hindsight validation (Handler #3) +func validateStability(reading *Reading, state *ProcessingState, logger io.Writer) (Action, string) { + // First reading - just accept it + if !state.HasLastReading { + state.LastReading = *reading + state.HasLastReading = true + state.BaselineReading = *reading + return ActionPost, "" + } + + // Calculate deltas from last reading + spo2Delta := abs(reading.SpO2 - state.LastReading.SpO2) + hrDelta := abs(reading.HR - state.LastReading.HR) + + // Stable (Ī” ≤ 3) - check if we have pending + if spo2Delta <= 3 && hrDelta <= 3 { + if state.PendingReading != nil { + // Check if current reading is closer to baseline or to pending + spo2ToBaseline := abs(reading.SpO2 - state.BaselineReading.SpO2) + hrToBaseline := abs(reading.HR - state.BaselineReading.HR) + spo2ToPending := abs(reading.SpO2 - state.PendingReading.SpO2) + hrToPending := abs(reading.HR - state.PendingReading.HR) + + if spo2ToBaseline <= spo2ToPending && hrToBaseline <= hrToPending { + // Closer to baseline - pending was a glitch + logMessage(LogFile, Info, " [STABILITY] Discarding glitch: baseline(%d,%d) -> pending(%d,%d) -> current(%d,%d)", + state.BaselineReading.SpO2, state.BaselineReading.HR, + state.PendingReading.SpO2, state.PendingReading.HR, + reading.SpO2, reading.HR) + state.PendingReading = nil + } else { + // Closer to pending - real trend, post pending first + logMessage(LogFile, Info, " [STABILITY] Confirming trend: baseline(%d,%d) -> pending(%d,%d) -> current(%d,%d)", + state.BaselineReading.SpO2, state.BaselineReading.HR, + state.PendingReading.SpO2, state.PendingReading.HR, + reading.SpO2, reading.HR) + // Will be handled by caller to post pending first + state.PendingReading = nil + } + } + + // Accept current reading + state.LastReading = *reading + state.BaselineReading = *reading + return ActionPost, "" + } + + // Large delta (Ī” > 3) - check if we have pending + if state.PendingReading != nil { + // Check direction vs pending + spo2Direction := reading.SpO2 - state.PendingReading.SpO2 + hrDirection := reading.HR - state.PendingReading.HR + pendingSpo2Diff := state.PendingReading.SpO2 - state.LastReading.SpO2 + pendingHrDiff := state.PendingReading.HR - state.LastReading.HR + + // Same direction if signs match + spo2SameDir := (spo2Direction > 0 && pendingSpo2Diff > 0) || (spo2Direction < 0 && pendingSpo2Diff < 0) + hrSameDir := (hrDirection > 0 && pendingHrDiff > 0) || (hrDirection < 0 && pendingHrDiff < 0) + + if spo2SameDir && hrSameDir { + // Trend confirmed - post pending, then current becomes new pending + logMessage(LogFile, Info, " [STABILITY] Trend confirmed: %d->%d->%d (SpO2), %d->%d->%d (HR)", + state.LastReading.SpO2, state.PendingReading.SpO2, reading.SpO2, + state.LastReading.HR, state.PendingReading.HR, reading.HR) + // Post pending (caller handles), then hold current + oldPending := state.PendingReading + state.PendingReading = reading + state.LastReading = *reading + // Return special signal that pending should be posted + _ = oldPending // Will be handled by caller + return ActionHold, "" + } else { + // Opposite directions - discard pending as glitch + logMessage(LogFile, Info, " [STABILITY] Direction mismatch: %d->%d->%d (SpO2), %d->%d->%d (HR)", + state.LastReading.SpO2, state.PendingReading.SpO2, reading.SpO2, + state.LastReading.HR, state.PendingReading.HR, reading.HR) + state.PendingReading = nil + state.LastReading = *reading + return ActionPost, "" + } + } + + // No pending yet - hold this unstable reading + logMessage(LogFile, Info, " [STABILITY] Holding unstable reading: SpO2 Ī”%d, HR Ī”%d", spo2Delta, hrDelta) + state.BaselineReading = state.LastReading + state.PendingReading = reading + state.LastReading = *reading + reason := fmt.Sprintf("[%s] Unstable (SpO2 Ī”%d, HR Ī”%d)", time.Now().Format("15:04:05.000"), spo2Delta, hrDelta) + return ActionHold, reason +} + +func abs(x int) int { + if x < 0 { + return -x + } + return x +} diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..944163f --- /dev/null +++ b/build.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Build script for pulseox-monitor v3.60 + +echo "=== Building PulseOx Monitor v3.60 (Unified Detection) ===" +echo "" + +# List of all source files needed +FILES="pulseox-monitor.go types.go helpers.go frame_source.go processor.go validators.go ocr.go layout_detection.go detection.go config.go homeassistant.go html_report.go timestamp_ocr.go review_server.go ai_fallback.go" + +# Check if all files exist +MISSING=0 +for file in $FILES; do + if [ ! -f "$file" ]; then + echo "āŒ Missing file: $file" + MISSING=1 + fi +done + +if [ $MISSING -eq 1 ]; then + echo "" + echo "Error: Missing source files. Please ensure all files are present." + exit 1 +fi + +echo "āœ“ All source files present" +echo "" +echo "Compiling..." + +# Build +go build -o pulseox-monitor $FILES + +if [ $? -eq 0 ]; then + echo "" + echo "āœ“ Build successful: pulseox-monitor" + echo "" + echo "Test with:" + echo " ./pulseox-monitor raw_frames/raw_20251028-00123.png # Single frame" + echo " ./pulseox-monitor # Live stream" + echo "" +else + echo "" + echo "āŒ Build failed" + exit 1 +fi diff --git a/capture_frame.go b/capture_frame.go new file mode 100644 index 0000000..6582659 --- /dev/null +++ b/capture_frame.go @@ -0,0 +1,44 @@ +// +build ignore + +package main + +import ( + "fmt" + "time" + + "gocv.io/x/gocv" +) + +func main() { + // GStreamer pipeline with explicit credentials + pipeline := "rtspsrc location=rtsp://192.168.2.183:554/stream2 user-id=tapohass user-pw=!!Helder06 latency=0 ! decodebin ! videoconvert ! appsink" + + fmt.Println("Connecting to RTSP stream...") + stream, err := gocv.OpenVideoCapture(pipeline) + if err != nil { + fmt.Printf("Failed: %v\n", err) + return + } + defer stream.Close() + + fmt.Println("Reading frame...") + frame := gocv.NewMat() + defer frame.Close() + + // Skip a few frames to get a stable one + for i := 0; i < 5; i++ { + stream.Read(&frame) + time.Sleep(100 * time.Millisecond) + } + + if frame.Empty() { + fmt.Println("Failed to read frame") + return + } + + fmt.Printf("Frame size: %dx%d\n", frame.Cols(), frame.Rows()) + + // Save raw frame + gocv.IMWrite("test_raw_frame.png", frame) + fmt.Println("Saved to test_raw_frame.png") +} diff --git a/config.go b/config.go new file mode 100644 index 0000000..4c74768 --- /dev/null +++ b/config.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +type Config struct { + Camera struct { + RTSPURL string `yaml:"rtsp_url"` + } `yaml:"camera"` + HomeAssistant struct { + URL string `yaml:"url"` + Token string `yaml:"token"` + } `yaml:"home_assistant"` + Processing struct { + SampleInterval int `yaml:"sample_interval"` + ChangeThresholdPercent int `yaml:"change_threshold_percent"` + MaxDriftSeconds int `yaml:"max_drift_seconds"` + } `yaml:"processing"` + Logging struct { + Level string `yaml:"level"` + File string `yaml:"file"` + } `yaml:"logging"` +} + +func LoadConfig(filename string) (*Config, error) { + data, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("failed to read config file: %w", err) + } + + var config Config + if err := yaml.Unmarshal(data, &config); err != nil { + return nil, fmt.Errorf("failed to parse config file: %w", err) + } + + return &config, nil +} diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..a6ab888 --- /dev/null +++ b/config.yaml @@ -0,0 +1,26 @@ +# Pulse Oximeter Monitor Configuration + +camera: + # Tapo C110 RTSP URL (pulse ox monitoring camera) + # Note: stream2 works, stream1 does not exist on this camera + rtsp_url: "rtsp://tapohass:!!Helder06@192.168.2.183:554/stream2" + +home_assistant: + # TS140 server IP where HASS is hosted + url: "http://192.168.1.252:8123" + # Long-lived access token for pulse-monitor + token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiIzOTMxMTA4MjczYzI0NDU1YjIzOGJlZWE0Y2NkM2I1OCIsImlhdCI6MTc2MTExNTQxNywiZXhwIjoyMDc2NDc1NDE3fQ.URFS4M0rX78rW27gQuBX-PyrPYMLlGujF16jIBHXYOw" + +processing: + # How often to capture and process frames (seconds) + sample_interval: 1 + # Reject readings that change more than this percent + change_threshold_percent: 5 + # Maximum allowed drift INCREASE in seconds (detects camera lag/freeze) + # Camera can be 10s behind server consistently - that's fine + # But if drift increases by more than this, frame is stale + max_drift_seconds: 3 + +logging: + level: "info" # debug, info, warn, error + file: "./logs/pulse-monitor.log" \ No newline at end of file diff --git a/create_backup.sh b/create_backup.sh new file mode 100755 index 0000000..46b1f41 --- /dev/null +++ b/create_backup.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Backup script for pulse-monitor before refactoring +# Created: 2025-10-30 + +BACKUP_DIR="backups/backup_20251030_043342" +mkdir -p "$BACKUP_DIR" + +# Copy all source files +cp pulse-monitor.go "$BACKUP_DIR/" +cp ocr.go "$BACKUP_DIR/" +cp layout_detection.go "$BACKUP_DIR/" +cp normalize.go "$BACKUP_DIR/" +cp config.go "$BACKUP_DIR/" +cp homeassistant.go "$BACKUP_DIR/" +cp html_report.go "$BACKUP_DIR/" +cp PROJECT_STATE.md "$BACKUP_DIR/" +cp config.yaml "$BACKUP_DIR/" + +echo "āœ“ Backup completed to $BACKUP_DIR" +ls -lh "$BACKUP_DIR" diff --git a/detection.go b/detection.go new file mode 100644 index 0000000..91cf11b --- /dev/null +++ b/detection.go @@ -0,0 +1,793 @@ +package main + +import ( + "fmt" + "image" + "image/color" + "math" + "sort" + + "gocv.io/x/gocv" +) + +// DEBUG flag is declared in types.go + +// Band represents a vertical band of contours +type Band struct { + minX int + maxX int + minY int + maxY int +} + +// DetectBands finds and groups contours into vertical bands +// Only returns bands with height >= minHeight pixels +func DetectBands(binary gocv.Mat, minHeight int) []Band { + type Box struct { + minX int + maxX int + minY int + maxY int + } + + // Remove thin vertical lines (UI borders) using morphological opening with horizontal kernel + // This breaks the connection between rows caused by vertical UI elements + kernel := gocv.GetStructuringElement(gocv.MorphRect, image.Pt(15, 1)) // Wide horizontal kernel + cleaned := gocv.NewMat() + gocv.MorphologyEx(binary, &cleaned, gocv.MorphOpen, kernel) + kernel.Close() + defer cleaned.Close() + + // Find contours on the cleaned image + contours := gocv.FindContours(cleaned, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + // Get all bounding boxes, filtering out tall thin UI elements (borders) + boxes := []Box{} + frameHeight := binary.Rows() + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + width := rect.Max.X - rect.Min.X + height := rect.Max.Y - rect.Min.Y + + // Skip tall thin contours (aspect ratio > 8) that span most of frame height + // These are likely UI borders, not content + if width > 0 && height > frameHeight*6/10 { + aspectRatio := float64(height) / float64(width) + if aspectRatio > 8 { + if DEBUG { + fmt.Printf("DEBUG: Skipping border contour: %dx%d (aspect=%.1f)\n", width, height, aspectRatio) + } + continue + } + } + + boxes = append(boxes, Box{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y, + maxY: rect.Max.Y, + }) + } + + // Sort boxes by minY + sort.Slice(boxes, func(i, j int) bool { + return boxes[i].minY < boxes[j].minY + }) + + // Group overlapping/adjacent boxes into vertical bands + bands := []Band{} + + for _, box := range boxes { + // Skip boxes shorter than minHeight + height := box.maxY - box.minY + if height < minHeight { + continue + } + + if len(bands) == 0 { + // First band + bands = append(bands, Band{ + minX: box.minX, + maxX: box.maxX, + minY: box.minY, + maxY: box.maxY, + }) + } else { + lastBand := &bands[len(bands)-1] + + // Check if this box overlaps or is close to last band (within 5px) + if box.minY <= lastBand.maxY+5 { + // Merge: extend the band in both X and Y + if box.minX < lastBand.minX { + lastBand.minX = box.minX + } + if box.maxX > lastBand.maxX { + lastBand.maxX = box.maxX + } + if box.maxY > lastBand.maxY { + lastBand.maxY = box.maxY + } + } else { + // New band + bands = append(bands, Band{ + minX: box.minX, + maxX: box.maxX, + minY: box.minY, + maxY: box.maxY, + }) + } + } + } + + return bands +} + +// findBaseline analyzes the baseline in the graph band +// Returns left point, right point, width, number of sections, and error +func findBaseline(binary gocv.Mat, graphBand Band) (*image.Point, *image.Point, int, int, error) { + // Extract just the graph band region + graphRegion := binary.Region(image.Rect(0, graphBand.minY, binary.Cols(), graphBand.maxY+1)) + + // Find all contours in graph region + contours := gocv.FindContours(graphRegion, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + // Get sections (contours) with width >= 5px + type Section struct { + minX int + maxX int + minY int + maxY int + width int + height int + } + sections := []Section{} + allContours := []Section{} // For debug visualization + + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + width := rect.Max.X - rect.Min.X + height := rect.Max.Y - rect.Min.Y + + // Collect ALL contours >= 50px high for visualization + if height >= 50 { + allContours = append(allContours, Section{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y + graphBand.minY, + maxY: rect.Max.Y + graphBand.minY, + width: width, + height: height, + }) + } + + // Filter: width >= 5px AND height >= 50px for baseline detection + // (lowered from 80px to handle smaller waveform sections) + if width >= 5 && height >= 50 { + sections = append(sections, Section{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y + graphBand.minY, // Adjust to full image coordinates + maxY: rect.Max.Y + graphBand.minY, + width: width, + height: height, + }) + } + } + + // DEBUG: Visualize ALL contours >= 50px high + if DEBUG { + viz := gocv.NewMat() + gocv.CvtColor(binary, &viz, gocv.ColorGrayToBGR) + + // Draw graph band boundaries + gocv.Line(&viz, image.Pt(0, graphBand.minY), image.Pt(viz.Cols(), graphBand.minY), + color.RGBA{255, 0, 255, 255}, 2) + gocv.Line(&viz, image.Pt(0, graphBand.maxY), image.Pt(viz.Cols(), graphBand.maxY), + color.RGBA{255, 0, 255, 255}, 2) + + // Draw ALL contours >= 50px high in yellow + for i, c := range allContours { + rect := image.Rect(c.minX, c.minY, c.maxX, c.maxY) + gocv.Rectangle(&viz, rect, color.RGBA{0, 255, 255, 255}, 2) + label := fmt.Sprintf("#%d: %dx%d", i+1, c.width, c.height) + gocv.PutText(&viz, label, image.Pt(c.minX, c.minY-5), + gocv.FontHersheyPlain, 1.0, color.RGBA{0, 255, 255, 255}, 1) + } + + // Highlight sections that passed the 80px height filter in green + for i, s := range sections { + rect := image.Rect(s.minX, s.minY, s.maxX, s.maxY) + gocv.Rectangle(&viz, rect, color.RGBA{0, 255, 0, 255}, 3) + label := fmt.Sprintf("BASELINE#%d", i+1) + gocv.PutText(&viz, label, image.Pt(s.minX, s.maxY+20), + gocv.FontHersheyPlain, 1.2, color.RGBA{0, 255, 0, 255}, 2) + } + + // Add summary text + summary := fmt.Sprintf("Contours H>=50px: %d | Baseline sections: %d", len(allContours), len(sections)) + gocv.PutText(&viz, summary, image.Pt(10, 30), + gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 0, 255}, 2) + + gocv.IMWrite("test_output/debug_all_contours.png", viz) + viz.Close() + fmt.Println("šŸ’¾ Saved all contours visualization to test_output/debug_all_contours.png") + } + + // Check if we have exactly 2 sections + if len(sections) != 2 { + return nil, nil, 0, len(sections), fmt.Errorf("found %d sections (need exactly 2)", len(sections)) + } + + // Sort sections by X position (left to right) + sort.Slice(sections, func(i, j int) bool { + return sections[i].minX < sections[j].minX + }) + + // Horizontal width (left edge of left section to right edge of right section) + horizontalWidth := sections[1].maxX - sections[0].minX + + // Fixed offset from edges for sampling points + // Needs to be large enough to avoid curved edges due to tilt + offset := 10 + + // Find rotation points + // Left section: lowest white pixel offset from left edge + leftSection := sections[0] + leftX := leftSection.minX + offset + leftPoint := findLowestWhitePixel(binary, leftX, graphBand.minY, graphBand.maxY) + + // Right section: lowest white pixel offset from right edge + rightSection := sections[1] + rightX := rightSection.maxX - offset + rightPoint := findLowestWhitePixel(binary, rightX, graphBand.minY, graphBand.maxY) + + if leftPoint == nil || rightPoint == nil { + return nil, nil, 0, len(sections), fmt.Errorf("could not find rotation endpoints") + } + + return leftPoint, rightPoint, horizontalWidth, len(sections), nil +} + +// findLowestWhitePixel finds the lowest (max Y) white pixel at given X within Y range +func findLowestWhitePixel(binary gocv.Mat, x int, minY int, maxY int) *image.Point { + lowestY := -1 + for y := minY; y <= maxY; y++ { + if binary.GetUCharAt(y, x) == 255 { + lowestY = y + } + } + if lowestY == -1 { + return nil + } + return &image.Point{X: x, Y: lowestY} +} + +// DetectionResult contains rotation and width detection results +type DetectionResult struct { + Rotation float64 + Width int + ScaleFactor float64 // Scale factor to apply (860 / width) + SpO2 image.Rectangle // SpO2 display area in SCALED, ROTATED coordinates + HR image.Rectangle // HR display area in SCALED, ROTATED coordinates + Success bool +} + +// PreprocessFrame takes a raw frame and prepares it for detection +// Returns the processed binary image +func PreprocessFrame(frame gocv.Mat) gocv.Mat { + // Crop top 68 pixels (timestamp) + cropped := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + + // Rotate 90° clockwise + rotated := gocv.NewMat() + gocv.Rotate(cropped, &rotated, gocv.Rotate90Clockwise) + + // Convert to grayscale + gray := gocv.NewMat() + gocv.CvtColor(rotated, &gray, gocv.ColorBGRToGray) + rotated.Close() + + // Threshold to binary (matching main pipeline) + binary := gocv.NewMat() + gocv.Threshold(gray, &binary, 180, 255, gocv.ThresholdBinary) + gray.Close() + + return binary +} + +// DetectRotationAndWidth analyzes a binary frame and returns rotation angle, width, and display areas +// Works on its own internal copy - does NOT modify the input frame +// Returns coordinates in TRANSFORMED (scaled to 860px, rotated) coordinate space +func DetectRotationAndWidth(binary gocv.Mat) DetectionResult { + // Clone the input so we don't modify it + workingCopy := binary.Clone() + defer workingCopy.Close() + + // ========== PHASE 1: Detect rotation & scale from baseline ========== + + // Detect bands (using 40px minimum height for original scale - lowered for stream2 which is lower res) + bands := DetectBands(workingCopy, 40) + + // Save debug image showing detected bands (only in DEBUG mode) + if DEBUG { + viz := gocv.NewMat() + gocv.CvtColor(workingCopy, &viz, gocv.ColorGrayToBGR) + colors := []color.RGBA{ + {255, 0, 0, 0}, // Red + {0, 255, 0, 0}, // Green + {0, 0, 255, 0}, // Blue + {255, 255, 0, 0}, // Yellow + {255, 0, 255, 0}, // Magenta + {0, 255, 255, 0}, // Cyan + } + for i, band := range bands { + c := colors[i%len(colors)] + gocv.Rectangle(&viz, image.Rect(band.minX, band.minY, band.maxX, band.maxY), c, 2) + gocv.PutText(&viz, fmt.Sprintf("Band %d: h=%d", i, band.maxY-band.minY), + image.Pt(band.minX, band.minY-5), gocv.FontHersheyPlain, 1.0, c, 1) + } + gocv.PutText(&viz, fmt.Sprintf("Total bands: %d (need >=2)", len(bands)), + image.Pt(10, 30), gocv.FontHersheyPlain, 1.5, color.RGBA{255, 255, 255, 0}, 2) + gocv.IMWrite("test_output/debug_bands.png", viz) + viz.Close() + } + + // Check if we have any content visible + if len(bands) < 1 { + if DEBUG { + fmt.Printf("DEBUG: Detection failed - no bands found\n") + } + return DetectionResult{Success: false} + } + + // For rotation detection, we ideally want a waveform band + // If available, use band 0 as graph band; otherwise skip rotation detection + var graphBand Band + var hasGraphBand bool + if len(bands) >= 1 { + graphBand = bands[0] // Use first band for rotation detection if available + hasGraphBand = true + if DEBUG { + fmt.Printf("DEBUG: Using band 0 as graph band (h=%d)\n", graphBand.maxY-graphBand.minY) + } + } + + // Find baseline for rotation detection (only if we have a usable graph band) + var leftPoint, rightPoint *image.Point + var width, sections int + var err error + + if hasGraphBand { + leftPoint, rightPoint, width, sections, err = findBaseline(workingCopy, graphBand) + } + + // Variables for rotation and scale + var rotation float64 + var scaleFactor float64 + + if !hasGraphBand || err != nil || sections != 2 { + // Baseline detection failed or not available - use fallback (assume no rotation, scale=1) + if DEBUG { + if !hasGraphBand { + fmt.Printf("DEBUG: No graph band - using fallback rotation=0, scale=1\n") + } else if err != nil { + fmt.Printf("DEBUG: Baseline failed (%v) - using fallback\n", err) + } else { + fmt.Printf("DEBUG: Baseline found %d sections - using fallback\n", sections) + } + } + rotation = 0.0 + scaleFactor = 1.0 // No scaling for fixed layout approach + width = workingCopy.Cols() + // Create dummy points for visualization + midY := workingCopy.Rows() / 2 + leftPoint = &image.Point{X: 0, Y: midY} + rightPoint = &image.Point{X: workingCopy.Cols(), Y: midY} + } else { + // Baseline detection succeeded - calculate rotation and scale + deltaY := float64(rightPoint.Y - leftPoint.Y) + deltaX := float64(rightPoint.X - leftPoint.X) + rotation = math.Atan2(deltaY, deltaX) * 180 / math.Pi + scaleFactor = 860.0 / float64(width) + } + + // DEBUG: Visualize the baseline points + if DEBUG { + viz := gocv.NewMat() + gocv.CvtColor(workingCopy, &viz, gocv.ColorGrayToBGR) + + // Find the two baseline sections for visualization + graphRegion := workingCopy.Region(image.Rect(0, graphBand.minY, workingCopy.Cols(), graphBand.maxY+1)) + contours := gocv.FindContours(graphRegion, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + type Section struct { + minX int + maxX int + minY int + maxY int + } + sections := []Section{} + + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + width := rect.Max.X - rect.Min.X + height := rect.Max.Y - rect.Min.Y + if width >= 5 && height >= 80 { + sections = append(sections, Section{ + minX: rect.Min.X, + maxX: rect.Max.X, + minY: rect.Min.Y + graphBand.minY, + maxY: rect.Max.Y + graphBand.minY, + }) + } + } + + sort.Slice(sections, func(i, j int) bool { + return sections[i].minX < sections[j].minX + }) + + // Draw baseline sections in cyan + for i, sec := range sections { + rect := image.Rect(sec.minX, sec.minY, sec.maxX, sec.maxY) + gocv.Rectangle(&viz, rect, color.RGBA{0, 255, 255, 255}, 2) + label := fmt.Sprintf("Sec%d: %dpx", i+1, sec.maxX-sec.minX) + gocv.PutText(&viz, label, image.Pt(sec.minX, sec.minY-5), + gocv.FontHersheyPlain, 1.0, color.RGBA{0, 255, 255, 255}, 1) + } + + // Draw left point (green circle) + gocv.Circle(&viz, *leftPoint, 10, color.RGBA{0, 255, 0, 255}, -1) + gocv.PutText(&viz, "L", image.Pt(leftPoint.X-5, leftPoint.Y-15), + gocv.FontHersheyPlain, 1.5, color.RGBA{0, 255, 0, 255}, 2) + + // Draw right point (red circle) + gocv.Circle(&viz, *rightPoint, 10, color.RGBA{0, 0, 255, 255}, -1) + gocv.PutText(&viz, "R", image.Pt(rightPoint.X-5, rightPoint.Y-15), + gocv.FontHersheyPlain, 1.5, color.RGBA{0, 0, 255, 255}, 2) + + // Draw line between baseline points (yellow) + gocv.Line(&viz, *leftPoint, *rightPoint, color.RGBA{0, 255, 255, 255}, 3) + + // Draw graph band boundaries (magenta) + gocv.Line(&viz, image.Pt(0, graphBand.minY), image.Pt(viz.Cols(), graphBand.minY), color.RGBA{255, 0, 255, 255}, 2) + gocv.Line(&viz, image.Pt(0, graphBand.maxY), image.Pt(viz.Cols(), graphBand.maxY), color.RGBA{255, 0, 255, 255}, 2) + gocv.PutText(&viz, "Graph Band", image.Pt(10, graphBand.minY+20), + gocv.FontHersheyPlain, 1.2, color.RGBA{255, 0, 255, 255}, 1) + + // Add measurements text overlay at top + widthText := fmt.Sprintf("Baseline Width: %dpx", width) + gocv.PutText(&viz, widthText, image.Pt(10, 30), + gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 0, 255}, 2) + + rotText := fmt.Sprintf("Rotation: %.3f deg", rotation) + gocv.PutText(&viz, rotText, image.Pt(10, 60), + gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 0, 255}, 2) + + scaleText := fmt.Sprintf("Scale: %.3f (->860px)", scaleFactor) + gocv.PutText(&viz, scaleText, image.Pt(10, 90), + gocv.FontHersheyDuplex, 1.0, color.RGBA{255, 255, 0, 255}, 2) + + gocv.IMWrite("test_output/debug_baseline_detection.png", viz) + viz.Close() + fmt.Println("šŸ’¾ Saved baseline detection visualization to test_output/debug_baseline_detection.png") + } + + if DEBUG { + fmt.Printf("DEBUG: Baseline points: Left(%d,%d) Right(%d,%d)\n", + leftPoint.X, leftPoint.Y, rightPoint.X, rightPoint.Y) + fmt.Printf("DEBUG: Delta Y=%d, Delta X=%d, Rotation=%.3f°\n", + rightPoint.Y-leftPoint.Y, rightPoint.X-leftPoint.X, rotation) + fmt.Printf("DEBUG: Width=%dpx, Scale factor=%.3f (baseline will become 860px)\n", + width, scaleFactor) + } + + // ========== PHASE 2: Find the main digit band for SpO2/HR ========== + // + // For this pulse-ox display, use FIXED proportions since the layout is consistent + // and dynamic band detection is unreliable due to vertical UI elements + // + // After 90° CW rotation, the frame layout is approximately: + // - Y 0-35%: Status bar, bell icon + // - Y 35-45%: Waveform display + // - Y 45-60%: Main SpO2/HR digits (what we want!) + // - Y 60-75%: Secondary readings + + frameHeight := workingCopy.Rows() + frameWidth := workingCopy.Cols() + + // Use fixed proportions for the digit band + digitBand := Band{ + minX: int(float64(frameWidth) * 0.05), // Slight margin from left + maxX: int(float64(frameWidth) * 0.75), // Avoid the vertical progress bar on right + minY: int(float64(frameHeight) * 0.45), + maxY: int(float64(frameHeight) * 0.58), + } + + if DEBUG { + fmt.Printf("DEBUG: Using fixed digit band: Y[%d-%d], X[%d-%d] (frame %dx%d)\n", + digitBand.minY, digitBand.maxY, digitBand.minX, digitBand.maxX, + frameWidth, frameHeight) + } + + // Split digit band: left half = SpO2, right half = HR + bandWidth := digitBand.maxX - digitBand.minX + midX := digitBand.minX + bandWidth/2 + + // SpO2 is left half, HR is right half + spo2Rect := image.Rect( + digitBand.minX, + digitBand.minY, + midX, + digitBand.maxY, + ) + hrRect := image.Rect( + midX, + digitBand.minY, + digitBand.maxX, + digitBand.maxY, + ) + + if DEBUG { + fmt.Printf("DEBUG: SpO2 display area: X[%d-%d] Y[%d-%d]\n", + spo2Rect.Min.X, spo2Rect.Max.X, spo2Rect.Min.Y, spo2Rect.Max.Y) + fmt.Printf("DEBUG: HR display area: X[%d-%d] Y[%d-%d]\n", + hrRect.Min.X, hrRect.Max.X, hrRect.Min.Y, hrRect.Max.Y) + + // Visualize final display boxes + viz2 := gocv.NewMat() + gocv.CvtColor(workingCopy, &viz2, gocv.ColorGrayToBGR) + + // Draw SpO2 box in red + gocv.Rectangle(&viz2, spo2Rect, color.RGBA{255, 0, 0, 255}, 3) + gocv.PutText(&viz2, "SpO2", image.Pt(spo2Rect.Min.X, spo2Rect.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, color.RGBA{255, 0, 0, 255}, 2) + + // Draw HR box in cyan + gocv.Rectangle(&viz2, hrRect, color.RGBA{0, 255, 255, 255}, 3) + gocv.PutText(&viz2, "HR", image.Pt(hrRect.Min.X, hrRect.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, color.RGBA{0, 255, 255, 255}, 2) + + gocv.IMWrite("test_output/debug_final_boxes.png", viz2) + viz2.Close() + fmt.Println("šŸ’¾ Saved final display boxes visualization to test_output/debug_final_boxes.png") + } + + return DetectionResult{ + Rotation: rotation, + Width: width, + ScaleFactor: scaleFactor, + SpO2: spo2Rect, + HR: hrRect, + Success: true, + } +} + +// RotateImage rotates an image by the given angle in degrees +// The angle is the detected rotation that needs to be corrected +func RotateImage(img gocv.Mat, angleDegrees float64) gocv.Mat { + // Get image center + center := image.Point{ + X: img.Cols() / 2, + Y: img.Rows() / 2, + } + + // Apply rotation to correct the tilt + // NOTE: If image rotates wrong direction, change to: -angleDegrees + rotMat := gocv.GetRotationMatrix2D(center, angleDegrees, 1.0) + defer rotMat.Close() + + // Apply rotation + rotated := gocv.NewMat() + gocv.WarpAffine(img, &rotated, rotMat, image.Point{X: img.Cols(), Y: img.Rows()}) + + if DEBUG { + fmt.Printf("DEBUG: Applied rotation: %.3f°\n", angleDegrees) + } + + return rotated +} + +// ExtractBandRegion extracts a specific band region from an image +func ExtractBandRegion(img gocv.Mat, band Band) gocv.Mat { + // Create a region of interest (ROI) for the band + rect := image.Rect(0, band.minY, img.Cols(), band.maxY) + region := img.Region(rect) + + // Clone the region to create an independent Mat + extracted := gocv.NewMat() + region.CopyTo(&extracted) + + if DEBUG { + fmt.Printf("DEBUG: Extracted band region [%d-%d], size: %dx%d\n", + band.minY, band.maxY, extracted.Cols(), extracted.Rows()) + } + + return extracted +} + +// FindDigitBoxes finds individual digit contours within a region +// Returns bounding rectangles for digits larger than minSize +func FindDigitBoxes(digitRegion gocv.Mat, minSize int) []image.Rectangle { + contours := gocv.FindContours(digitRegion, gocv.RetrievalExternal, gocv.ChainApproxSimple) + + var digitBoxes []image.Rectangle + for i := 0; i < contours.Size(); i++ { + rect := gocv.BoundingRect(contours.At(i)) + height := rect.Max.Y - rect.Min.Y + + // Filter by minimum size - height only + if height >= minSize { + digitBoxes = append(digitBoxes, rect) + } + } + + // Sort boxes by X position (left to right) + sort.Slice(digitBoxes, func(i, j int) bool { + return digitBoxes[i].Min.X < digitBoxes[j].Min.X + }) + + if DEBUG { + fmt.Printf("DEBUG: Found %d digit contours (H>=%dpx)\n", len(digitBoxes), minSize) + + // Visualize found boxes + visualization := gocv.NewMat() + defer visualization.Close() + gocv.CvtColor(digitRegion, &visualization, gocv.ColorGrayToBGR) + + for i, box := range digitBoxes { + // Draw box in green + green := color.RGBA{0, 255, 0, 255} + gocv.Rectangle(&visualization, box, green, 2) + + // Show box number and size + label := fmt.Sprintf("#%d: %dx%d", i, box.Dx(), box.Dy()) + gocv.PutText(&visualization, label, image.Pt(box.Min.X, box.Min.Y-5), + gocv.FontHersheyPlain, 0.8, green, 1) + } + + gocv.IMWrite("test_output/debug_digit_boxes.png", visualization) + fmt.Printf("DEBUG: Saved digit boxes to test_output/debug_digit_boxes.png\n") + } + + return digitBoxes +} + +// ScaleByFactor scales an image by a specific scale factor +func ScaleByFactor(img gocv.Mat, scaleFactor float64) gocv.Mat { + // Calculate new dimensions + newWidth := int(float64(img.Cols()) * scaleFactor) + newHeight := int(float64(img.Rows()) * scaleFactor) + + // Resize image + scaled := gocv.NewMat() + gocv.Resize(img, &scaled, image.Point{X: newWidth, Y: newHeight}, 0, 0, gocv.InterpolationLinear) + + if DEBUG { + fmt.Printf("DEBUG: Scaled image from %dx%d to %dx%d (scale factor: %.3f)\n", + img.Cols(), img.Rows(), newWidth, newHeight, scaleFactor) + } + + return scaled +} + +// applyTransforms applies rotation and scaling to a frame +// This is the SINGLE source of truth for transform logic +// Both detection and main loop MUST use this function to ensure identical transforms +func applyTransforms(img gocv.Mat, rotation float64, scaleFactor float64) gocv.Mat { + // Apply rotation (if needed) + var rotated gocv.Mat + if rotation != 0.0 { + rotated = RotateImage(img, rotation) + } else { + rotated = img.Clone() + } + + // Apply scaling (if needed) + var scaled gocv.Mat + if scaleFactor != 1.0 { + scaled = ScaleByFactor(rotated, scaleFactor) + if rotation != 0.0 { + rotated.Close() // Only close if we created a new Mat + } + } else { + scaled = rotated + } + + return scaled +} + +// DisplayAreas holds the two display rectangles for SpO2 and HR +type DisplayAreas struct { + SpO2 image.Rectangle + HR image.Rectangle +} + +// MergeDigitBoxesIntoDisplays merges individual digit boxes into two unified display areas +// This is the FINAL step of detection after finding digit boxes +// It splits at the center of the BOUNDING BOX (not image center) +// and merges all left boxes into SpO2, all right boxes into HR display area +func MergeDigitBoxesIntoDisplays(digitRegion gocv.Mat, digitBoxes []image.Rectangle) DisplayAreas { + // First, find the bounding box of ALL digit boxes + contentMinX := digitRegion.Cols() + contentMaxX := 0 + for _, box := range digitBoxes { + if box.Min.X < contentMinX { + contentMinX = box.Min.X + } + if box.Max.X > contentMaxX { + contentMaxX = box.Max.X + } + } + + // Calculate center of the CONTENT bounding box (not image center) + centerX := (contentMinX + contentMaxX) / 2 + + if DEBUG { + fmt.Printf("DEBUG: Content bounding box: X[%d-%d], Center X=%d\n", + contentMinX, contentMaxX, centerX) + } + + // Initialize bounding boxes + spo2MinX, spo2MaxX := digitRegion.Cols(), 0 + spo2MinY, spo2MaxY := digitRegion.Rows(), 0 + hrMinX, hrMaxX := digitRegion.Cols(), 0 + hrMinY, hrMaxY := digitRegion.Rows(), 0 + + // Track counts for logging + leftCount := 0 + rightCount := 0 + + // Split boxes by center X and merge + for _, box := range digitBoxes { + // Calculate box center X to determine which half it's in + boxCenterX := (box.Min.X + box.Max.X) / 2 + + if boxCenterX < centerX { + // LEFT HALF - SpO2 + leftCount++ + if box.Min.X < spo2MinX { + spo2MinX = box.Min.X + } + if box.Max.X > spo2MaxX { + spo2MaxX = box.Max.X + } + if box.Min.Y < spo2MinY { + spo2MinY = box.Min.Y + } + if box.Max.Y > spo2MaxY { + spo2MaxY = box.Max.Y + } + } else { + // RIGHT HALF - HR + rightCount++ + if box.Min.X < hrMinX { + hrMinX = box.Min.X + } + if box.Max.X > hrMaxX { + hrMaxX = box.Max.X + } + if box.Min.Y < hrMinY { + hrMinY = box.Min.Y + } + if box.Max.Y > hrMaxY { + hrMaxY = box.Max.Y + } + } + } + + if DEBUG { + fmt.Printf("DEBUG: Split at center X=%d: %d boxes left (SpO2), %d boxes right (HR)\n", + centerX, leftCount, rightCount) + fmt.Printf("DEBUG: SpO2 merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + spo2MinX, spo2MaxX, spo2MinY, spo2MaxY, + spo2MaxX-spo2MinX, spo2MaxY-spo2MinY) + fmt.Printf("DEBUG: HR merged area: X[%d-%d] Y[%d-%d], Size: %dx%d\n", + hrMinX, hrMaxX, hrMinY, hrMaxY, + hrMaxX-hrMinX, hrMaxY-hrMinY) + } + + return DisplayAreas{ + SpO2: image.Rect(spo2MinX, spo2MinY, spo2MaxX, spo2MaxY), + HR: image.Rect(hrMinX, hrMinY, hrMaxX, hrMaxY), + } +} diff --git a/frame_source.go b/frame_source.go new file mode 100644 index 0000000..9afeb17 --- /dev/null +++ b/frame_source.go @@ -0,0 +1,280 @@ +package main + +import ( + "fmt" + "image" + "net/url" + "sync" + "time" + + "gocv.io/x/gocv" +) + +// FrameSource provides frames to process +type FrameSource interface { + // Next returns the next frame and whether to continue processing + // Returns (frame, shouldContinue, error) + Next() (gocv.Mat, bool, error) + + // NextImmediate returns the very next frame (ignoring skip count) + // Used for immediate retry after low confidence/corruption + NextImmediate() (gocv.Mat, bool, error) + + // Close cleans up resources + Close() error + + // Name returns a description of the source + Name() string + + // IsActive returns false if source has been closed + IsActive() bool +} + +// RTSPSource reads from an RTSP stream +type RTSPSource struct { + url string + stream *gocv.VideoCapture + frameNum int + minFrameInterval time.Duration // Minimum time between processed frames + lastProcessedTime time.Time // Last time we processed a frame + lastAcquiredTime time.Time // Last time we acquired a frame (for logging) + closed bool + closeOnce sync.Once +} + +// buildGStreamerPipeline constructs a GStreamer pipeline from an RTSP URL +// Using explicit user-id/user-pw parameters works better than embedding in URL +func buildGStreamerPipeline(rtspURL string) (string, error) { + u, err := url.Parse(rtspURL) + if err != nil { + return "", fmt.Errorf("invalid RTSP URL: %w", err) + } + + // Extract credentials + var userID, userPW string + if u.User != nil { + userID = u.User.Username() + userPW, _ = u.User.Password() + } + + // Build location without credentials + locationURL := fmt.Sprintf("rtsp://%s%s", u.Host, u.Path) + + // Build GStreamer pipeline with explicit credentials + pipeline := fmt.Sprintf( + "rtspsrc location=%s user-id=%s user-pw=%s latency=0 ! decodebin ! videoconvert ! appsink", + locationURL, userID, userPW, + ) + + return pipeline, nil +} + +// NewRTSPSource creates a new RTSP frame source +// Processes frames twice per second +func NewRTSPSource(rtspURL string) (*RTSPSource, error) { + // Build GStreamer pipeline with explicit credentials + pipeline, err := buildGStreamerPipeline(rtspURL) + if err != nil { + return nil, err + } + + logMessage(Console, Info, "šŸ“” Connecting via GStreamer pipeline...") + stream, err := gocv.OpenVideoCapture(pipeline) + if err != nil { + return nil, fmt.Errorf("failed to connect to RTSP stream: %w", err) + } + + logMessage(Console, Info, "šŸ“Š Processing frames at 2 per second") + + return &RTSPSource{ + url: rtspURL, + stream: stream, + minFrameInterval: 500 * time.Millisecond, + lastProcessedTime: time.Time{}, // Zero time = process first frame immediately + }, nil +} + +// readFrame reads and validates the next frame from the stream +func (s *RTSPSource) readFrame() (gocv.Mat, bool, error) { + // Check if source was closed + if s.closed { + return gocv.NewMat(), false, nil + } + + // Check if stream is valid + if s.stream == nil { + return gocv.NewMat(), false, fmt.Errorf("stream is null") + } + + frame := gocv.NewMat() + if ok := s.stream.Read(&frame); !ok { + frame.Close() + + // Check again before reconnecting + if s.closed { + return gocv.NewMat(), false, nil + } + + // Try to reconnect + s.stream.Close() + time.Sleep(5 * time.Second) + + // Check again after sleep + if s.closed { + return gocv.NewMat(), false, nil + } + + // Rebuild GStreamer pipeline for reconnection + pipeline, err := buildGStreamerPipeline(s.url) + if err != nil { + s.stream = nil + s.closed = true + return gocv.NewMat(), false, fmt.Errorf("reconnect failed: %w", err) + } + + newStream, err := gocv.OpenVideoCapture(pipeline) + if err != nil { + s.stream = nil // Set to nil to prevent further read attempts + s.closed = true // Mark as closed + return gocv.NewMat(), false, fmt.Errorf("reconnect failed: %w", err) + } + s.stream = newStream + return gocv.NewMat(), true, nil // Signal to retry + } + + s.frameNum++ + + // Check frame validity + if frame.Empty() || frame.Cols() < 640 || frame.Rows() < 480 { + frame.Close() + return gocv.NewMat(), true, nil // Signal to retry + } + + return frame, true, nil +} + +func (s *RTSPSource) Next() (gocv.Mat, bool, error) { + for { + frame, shouldContinue, err := s.readFrame() + if err != nil || !shouldContinue { + return frame, shouldContinue, err + } + + if frame.Empty() { + // readFrame returned empty (reconnecting or invalid), retry + continue + } + + // Check if enough time has passed since last processed frame was ACQUIRED + now := time.Now() + if !s.lastProcessedTime.IsZero() { + elapsed := now.Sub(s.lastProcessedTime) + if elapsed < s.minFrameInterval { + // Not enough time passed, skip this frame + frame.Close() + continue + } + } + + // Enough time passed, mark acquisition time NOW (before processing) + s.lastProcessedTime = now + + // Log frame acquisition interval + if !s.lastAcquiredTime.IsZero() { + interval := now.Sub(s.lastAcquiredTime).Milliseconds() + logMessage(LogFile, Debug, " [TIMING] Frame acquired: +%dms since last (target: 500ms)", interval) + } + s.lastAcquiredTime = now + + return frame, true, nil + } +} + +func (s *RTSPSource) NextImmediate() (gocv.Mat, bool, error) { + // Get the very next frame (no skip count) + for { + frame, shouldContinue, err := s.readFrame() + if err != nil || !shouldContinue { + return frame, shouldContinue, err + } + + if frame.Empty() { + // readFrame returned empty (reconnecting or invalid), retry + continue + } + + return frame, true, nil + } +} + +func (s *RTSPSource) Close() error { + s.closeOnce.Do(func() { + s.closed = true + // DON'T close the stream here - it causes segfault if read is in progress + // The stream will be cleaned up when main() exits + }) + return nil +} + +func (s *RTSPSource) IsActive() bool { + return !s.closed +} + +func (s *RTSPSource) Name() string { + return fmt.Sprintf("RTSP stream: %s", s.url) +} + +// FileSource reads a single frame from a file +type FileSource struct { + path string + done bool +} + +// NewFileSource creates a new file frame source +func NewFileSource(path string) *FileSource { + return &FileSource{ + path: path, + done: false, + } +} + +func (s *FileSource) Next() (gocv.Mat, bool, error) { + if s.done { + return gocv.NewMat(), false, nil + } + + frame := gocv.IMRead(s.path, gocv.IMReadColor) + if frame.Empty() { + return gocv.NewMat(), false, fmt.Errorf("failed to load frame: %s", s.path) + } + + s.done = true + return frame, false, nil // Return frame, but signal to stop after this +} + +func (s *FileSource) NextImmediate() (gocv.Mat, bool, error) { + // For file source, NextImmediate is same as Next + return s.Next() +} + +func (s *FileSource) Close() error { + return nil +} + +func (s *FileSource) IsActive() bool { + return !s.done +} + +func (s *FileSource) Name() string { + return fmt.Sprintf("File: %s", s.path) +} + +// preprocessFrame crops timestamp and rotates +func preprocessFrame(frame gocv.Mat) gocv.Mat { + // Crop timestamp (top 68 pixels) + noTs := frame.Region(image.Rect(0, 68, frame.Cols(), frame.Rows())) + rotated := gocv.NewMat() + gocv.Rotate(noTs, &rotated, gocv.Rotate90Clockwise) + noTs.Close() + return rotated +} diff --git a/gemini_ocr.go b/gemini_ocr.go new file mode 100644 index 0000000..bb11476 --- /dev/null +++ b/gemini_ocr.go @@ -0,0 +1,387 @@ +package main + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "fmt" + "image" + "image/png" + "io" + "net/http" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "gocv.io/x/gocv" +) + +// Gemini OCR configuration +var ( + GeminiAPIKey = "AIzaSyAsSUSCVs3SPXL7ugsbXa-chzcOKKJJrbA" + GeminiModel = "gemini-2.0-flash-exp" // Cheapest vision model +) + +// OCR cache using image similarity (not hash!) +type OCRCache struct { + mu sync.RWMutex + entries []OCRCacheEntry +} + +type OCRCacheEntry struct { + Image gocv.Mat // Store the actual image for comparison + Value int + Timestamp time.Time +} + +var ocrCache = &OCRCache{ + entries: []OCRCacheEntry{}, +} + +// ResetCache clears the OCR cache (call at startup) +func ResetOCRCache() { + ocrCache.mu.Lock() + // Close all cached images + for _, entry := range ocrCache.entries { + entry.Image.Close() + } + ocrCache.entries = []OCRCacheEntry{} + ocrCache.mu.Unlock() + logMessage(Console, Info, "šŸ—‘ļø OCR cache cleared") +} + +// findBestCacheMatch finds the cached entry with highest similarity to the input image +// Uses template matching with smaller template to handle position shifts +// Returns the entry and similarity score (0-1), or nil if cache is empty +func findBestCacheMatch(img gocv.Mat) (*OCRCacheEntry, float32) { + ocrCache.mu.RLock() + defer ocrCache.mu.RUnlock() + + if len(ocrCache.entries) == 0 { + return nil, 0 + } + + var bestEntry *OCRCacheEntry + var bestScore float32 = 0 + var bestIdx int = -1 + + for i := range ocrCache.entries { + entry := &ocrCache.entries[i] + if entry.Image.Empty() { + continue + } + + // Create a smaller version of cached image (crop center 60%) + // This allows template matching to handle larger position shifts + marginX := entry.Image.Cols() / 5 // 20% margin each side + marginY := entry.Image.Rows() / 5 + if marginX < 10 { + marginX = 10 + } + if marginY < 10 { + marginY = 10 + } + + // Crop center portion as template + templateRect := image.Rect(marginX, marginY, + entry.Image.Cols()-marginX, entry.Image.Rows()-marginY) + template := entry.Image.Region(templateRect) + + // Template must be smaller than image for proper matching + if template.Cols() >= img.Cols() || template.Rows() >= img.Rows() { + template.Close() + continue + } + + // Compare using template matching - find best position + result := gocv.NewMat() + gocv.MatchTemplate(img, template, &result, gocv.TmCcoeffNormed, gocv.NewMat()) + _, maxVal, _, _ := gocv.MinMaxLoc(result) + result.Close() + template.Close() + + if maxVal > bestScore { + bestScore = maxVal + bestEntry = entry + bestIdx = i + } + } + + if DEBUG && bestEntry != nil { + fmt.Printf(" Best match: cache[%d] value=%d similarity=%.4f\n", bestIdx, bestEntry.Value, bestScore) + } + + return bestEntry, bestScore +} + +// OCR stats for cost tracking +var ( + OCRCallCount int + OCRCacheHits int + OCRTotalTokens int +) + +// Rate limiting: track API call times to stay under 10/minute +var ( + apiCallTimes []time.Time + apiCallTimesMu sync.Mutex +) + +// waitForRateLimit ensures we don't exceed 10 API calls per minute +// Uses a conservative limit of 8 to provide buffer for timing variations +func waitForRateLimit() { + apiCallTimesMu.Lock() + + for { + now := time.Now() + oneMinuteAgo := now.Add(-time.Minute) + + // Remove calls older than 1 minute + validCalls := []time.Time{} + for _, t := range apiCallTimes { + if t.After(oneMinuteAgo) { + validCalls = append(validCalls, t) + } + } + apiCallTimes = validCalls + + // If under 8 calls, we're good to proceed + if len(apiCallTimes) < 8 { + break + } + + // Need to wait - calculate how long until oldest call expires + oldestCall := apiCallTimes[0] + waitUntil := oldestCall.Add(time.Minute).Add(time.Second) // +1s buffer + waitDuration := waitUntil.Sub(now) + if waitDuration <= 0 { + break + } + + logMessage(Console, Info, "ā³ Rate limit: waiting %.0fs...", waitDuration.Seconds()) + apiCallTimesMu.Unlock() + time.Sleep(waitDuration) + apiCallTimesMu.Lock() + // Loop back to re-check after waking + } + + // Record this call + apiCallTimes = append(apiCallTimes, time.Now()) + apiCallTimesMu.Unlock() +} + +// resizeForOCR resizes image to small size for cheap API calls +// 140x85 is half of 280x170, should be readable for 2-3 digit numbers +func resizeForOCR(img gocv.Mat) gocv.Mat { + resized := gocv.NewMat() + gocv.Resize(img, &resized, image.Pt(140, 85), 0, 0, gocv.InterpolationLinear) + return resized +} + +// matToBase64PNG converts a gocv.Mat to base64-encoded PNG +func matToBase64PNGForGemini(img gocv.Mat) (string, error) { + goImg, err := img.ToImage() + if err != nil { + return "", fmt.Errorf("failed to convert Mat to image: %w", err) + } + + var buf bytes.Buffer + if err := png.Encode(&buf, goImg); err != nil { + return "", fmt.Errorf("failed to encode PNG: %w", err) + } + + return base64.StdEncoding.EncodeToString(buf.Bytes()), nil +} + +// GeminiOCR reads a number from an image using Gemini Vision API +// Returns the number and any error +func GeminiOCR(img gocv.Mat, displayName string) (int, error) { + // Resize for cheaper API calls + resized := resizeForOCR(img) + + // DEBUG: Save images to see what we're comparing + if DEBUG { + filename := fmt.Sprintf("test_output/ocr_%s_%d.png", displayName, time.Now().UnixMilli()%10000) + gocv.IMWrite(filename, resized) + logMessage(Console, Debug, " Saved %s (%dx%d)", filename, resized.Cols(), resized.Rows()) + } + + // Check cache using image similarity + // Threshold 0.90: same digit with video noise ~93-94%, different digits ~6-9% + bestMatch, similarity := findBestCacheMatch(resized) + if DEBUG { + logMessage(Console, Debug, " %s cache: entries=%d, bestSimilarity=%.4f", displayName, len(ocrCache.entries), similarity) + } + if bestMatch != nil && similarity >= 0.90 { + resized.Close() + OCRCacheHits++ + logMessage(Console, Debug, " %s: CACHE HIT (%.1f%%) → %d", displayName, similarity*100, bestMatch.Value) + return bestMatch.Value, nil + } + + // Log cache miss reason + if bestMatch != nil { + logMessage(Console, Debug, " %s: CACHE MISS (%.1f%% < 90%%, bestMatch=%d) - calling API", displayName, similarity*100, bestMatch.Value) + } else { + logMessage(Console, Debug, " %s: CACHE EMPTY - calling API", displayName) + } + + // Convert to base64 + b64img, err := matToBase64PNGForGemini(resized) + if err != nil { + resized.Close() + return -1, err + } + + // Rate limit before API call + waitForRateLimit() + + // Call Gemini API + number, err := callGeminiAPI(b64img, displayName) + if err != nil { + resized.Close() + return -1, err + } + + logMessage(Console, Debug, " %s: API CALL → %d (cache now has %d entries)", displayName, number, len(ocrCache.entries)+1) + + // Cache the result with the actual image (don't close resized - it's now owned by cache) + ocrCache.mu.Lock() + ocrCache.entries = append(ocrCache.entries, OCRCacheEntry{ + Image: resized, // Transfer ownership to cache + Value: number, + Timestamp: time.Now(), + }) + ocrCache.mu.Unlock() + + OCRCallCount++ + return number, nil +} + +// callGeminiAPI calls Google's Gemini Vision API +func callGeminiAPI(b64img, displayName string) (int, error) { + url := fmt.Sprintf("https://generativelanguage.googleapis.com/v1beta/models/%s:generateContent?key=%s", + GeminiModel, GeminiAPIKey) + + prompt := fmt.Sprintf("This image shows a %s reading from a pulse oximeter. What number is displayed? Reply with ONLY the number (2-3 digits). If unclear, reply FAIL.", displayName) + + payload := map[string]interface{}{ + "contents": []map[string]interface{}{ + { + "parts": []map[string]interface{}{ + { + "text": prompt, + }, + { + "inline_data": map[string]string{ + "mime_type": "image/png", + "data": b64img, + }, + }, + }, + }, + }, + "generationConfig": map[string]interface{}{ + "maxOutputTokens": 10, + "temperature": 0, + }, + } + + jsonData, err := json.Marshal(payload) + if err != nil { + return -1, err + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return -1, err + } + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(req) + if err != nil { + return -1, err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return -1, err + } + + if resp.StatusCode != 200 { + return -1, fmt.Errorf("Gemini API error %d: %s", resp.StatusCode, string(body)) + } + + // Parse response + var result struct { + Candidates []struct { + Content struct { + Parts []struct { + Text string `json:"text"` + } `json:"parts"` + } `json:"content"` + } `json:"candidates"` + UsageMetadata struct { + PromptTokenCount int `json:"promptTokenCount"` + CandidatesTokenCount int `json:"candidatesTokenCount"` + TotalTokenCount int `json:"totalTokenCount"` + } `json:"usageMetadata"` + } + + if err := json.Unmarshal(body, &result); err != nil { + return -1, fmt.Errorf("failed to parse response: %w", err) + } + + if len(result.Candidates) == 0 || len(result.Candidates[0].Content.Parts) == 0 { + return -1, fmt.Errorf("no response from Gemini") + } + + // Track token usage + OCRTotalTokens += result.UsageMetadata.TotalTokenCount + + // Extract number from response + content := result.Candidates[0].Content.Parts[0].Text + return extractNumberFromResponse(content) +} + +// extractNumberFromResponse extracts a number from LLM response text +func extractNumberFromResponse(text string) (int, error) { + text = strings.TrimSpace(text) + + // Check for FAIL response + if strings.Contains(strings.ToUpper(text), "FAIL") { + return -1, fmt.Errorf("LLM returned FAIL (unclear image)") + } + + // Find all numbers in the text + re := regexp.MustCompile(`\d+`) + matches := re.FindAllString(text, -1) + + if len(matches) == 0 { + return -1, fmt.Errorf("no number found in response: %s", text) + } + + // Take the first number + num, err := strconv.Atoi(matches[0]) + if err != nil { + return -1, err + } + + return num, nil +} + +// GetOCRStats returns current OCR statistics +func GetOCRStats() (calls int, cacheHits int, tokens int) { + return OCRCallCount, OCRCacheHits, OCRTotalTokens +} + +// EstimateCost estimates the API cost based on token usage +// Gemini Flash: ~$0.075 per 1M input tokens, ~$0.30 per 1M output tokens +func EstimateCost() float64 { + // Rough estimate: mostly input tokens (images) + return float64(OCRTotalTokens) * 0.0000001 // ~$0.10 per 1M tokens average +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..72fd8bd --- /dev/null +++ b/go.mod @@ -0,0 +1,9 @@ +module pulse-monitor + +go 1.24.4 + +require ( + github.com/otiai10/gosseract/v2 v2.4.1 + gocv.io/x/gocv v0.31.0 + gopkg.in/yaml.v3 v3.0.1 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f562cc5 --- /dev/null +++ b/go.sum @@ -0,0 +1,12 @@ +github.com/hybridgroup/mjpeg v0.0.0-20140228234708-4680f319790e/go.mod h1:eagM805MRKrioHYuU7iKLUyFPVKqVV6um5DAvCkUtXs= +github.com/otiai10/gosseract/v2 v2.4.1 h1:G8AyBpXEeSlcq8TI85LH/pM5SXk8Djy2GEXisgyblRw= +github.com/otiai10/gosseract/v2 v2.4.1/go.mod h1:1gNWP4Hgr2o7yqWfs6r5bZxAatjOIdqWxJLWsTsembk= +github.com/otiai10/mint v1.6.3 h1:87qsV/aw1F5as1eH1zS/yqHY85ANKVMgkDrf9rcxbQs= +github.com/otiai10/mint v1.6.3/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= +github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +gocv.io/x/gocv v0.31.0 h1:BHDtK8v+YPvoSPQTTiZB2fM/7BLg6511JqkruY2z6LQ= +gocv.io/x/gocv v0.31.0/go.mod h1:oc6FvfYqfBp99p+yOEzs9tbYF9gOrAQSeL/dyIPefJU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/helpers.go b/helpers.go new file mode 100644 index 0000000..4a0920e --- /dev/null +++ b/helpers.go @@ -0,0 +1,117 @@ +package main + +import ( + "fmt" + "io" + "time" +) + +// LogTarget specifies where to write the log message +type LogTarget int +const ( + Console LogTarget = iota + LogFile + Both +) + +// LogLevel specifies the severity/type of log message +type LogLevel int +const ( + Debug LogLevel = iota + Info + Warning + Error +) + +// Global logger - set once during initialization +var globalLogger io.Writer = nil + +// logMessage writes a formatted log message to the specified target(s) +// Debug messages are automatically filtered out when DEBUG_MODE is false +// +// Usage examples: +// logMessage(Console, Info, "SpO2=%d%%, HR=%d bpm", spo2, hr) +// logMessage(Both, Debug, "Before preprocess: %dx%d", cols, rows) +// logMessage(Both, Error, "Layout detection failed: %v", err) +// logMessage(Both, Warning, "Low confidence (#%d)", count) +func logMessage(target LogTarget, level LogLevel, format string, args ...interface{}) { + // Filter debug messages when not in debug mode + if level == Debug && !DEBUG_MODE { + return + } + + // Format the message + message := fmt.Sprintf(format, args...) + + // Single-letter prefix for fixed-width alignment + var levelChar string + switch level { + case Debug: + levelChar = "D" + case Info: + levelChar = "I" + case Warning: + levelChar = "W" + case Error: + levelChar = "E" + } + + // Format with timestamp + timestamp := time.Now().Format("15:04:05.000") + formattedMessage := fmt.Sprintf("[%s] %s %s\n", timestamp, levelChar, message) + + // Write to target(s) + switch target { + case Console: + fmt.Print(formattedMessage) + case LogFile: + if globalLogger != nil { + fmt.Fprint(globalLogger, formattedMessage) + } + case Both: + fmt.Print(formattedMessage) + if globalLogger != nil { + fmt.Fprint(globalLogger, formattedMessage) + } + } +} + +// logf is DEPRECATED - use logMessage() instead +// Kept temporarily for reference, but no longer used in the codebase +/* +func logf(logger io.Writer, format string, args ...interface{}) { + if logger != nil { + fmt.Fprintf(logger, format, args...) + } +} +*/ + +// printTimingTable prints timing data in horizontal table format +// Prints header every 20 frames for readability +// Note: Acquire is separate (camera waiting time) +// Note: Total is wall-clock processing time (may not equal sum due to logging/overhead) +// Note: HASS is added in main loop after function returns +func printTimingTable(timing TimingData, showHeader bool) { + if !TIMING_MODE { + return + } + + if showHeader { + fmt.Println("") + fmt.Println("Frame | Acquire | Thresh | Prep | Scale | OCR_SpO2 | OCR_HR | Valid | FileIO | HASS | Total") + fmt.Println("------|---------|--------|------|-------|----------|--------|-------|--------|------|-------") + } + + fmt.Printf("#%-4d | %5dms | %5dms | %3dms | %4dms | %7dms | %5dms | %4dms | %5dms | %3dms | %4dms\n", + timing.FrameNum, + timing.Acquire, + timing.Threshold, + timing.Preprocess, + timing.Scale, + timing.OCR_SpO2, + timing.OCR_HR, + timing.Validation, + timing.FileIO, + timing.HASS, + timing.Total) +} diff --git a/homeassistant.go b/homeassistant.go new file mode 100644 index 0000000..1acbe1e --- /dev/null +++ b/homeassistant.go @@ -0,0 +1,74 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +func postToHomeAssistant(config *Config, entityID string, value int, unit string, friendlyName string) error { + url := fmt.Sprintf("%s/api/states/%s", config.HomeAssistant.URL, entityID) + payload := map[string]interface{}{ + "state": fmt.Sprintf("%d", value), + "attributes": map[string]interface{}{ + "unit_of_measurement": unit, + "friendly_name": friendlyName, + "device_class": "measurement", + }, + } + jsonData, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal error: %w", err) + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("request error: %w", err) + } + req.Header.Set("Authorization", "Bearer "+config.HomeAssistant.Token) + req.Header.Set("Content-Type", "application/json") + client := &http.Client{Timeout: 5 * time.Second} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("HTTP error: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != 200 && resp.StatusCode != 201 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) + } + return nil +} + +// sendNotification sends a notification via Home Assistant's notify.signal service +func sendNotification(config *Config, title, message string) error { + url := fmt.Sprintf("%s/api/services/notify/signal", config.HomeAssistant.URL) + payload := map[string]interface{}{ + "title": title, + "message": message, + } + jsonData, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal error: %w", err) + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return fmt.Errorf("request error: %w", err) + } + req.Header.Set("Authorization", "Bearer "+config.HomeAssistant.Token) + req.Header.Set("Content-Type", "application/json") + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("HTTP error: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != 200 && resp.StatusCode != 201 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) + } + logMessage(LogFile, Info, " šŸ“± Notification sent: %s", title) + return nil +} diff --git a/html_report.go b/html_report.go new file mode 100644 index 0000000..0ab74a6 --- /dev/null +++ b/html_report.go @@ -0,0 +1,260 @@ +package main + +import ( + "fmt" + "os" +) + +type ReviewEntry struct { + FrameNum int + Timestamp string + SpO2Value int + SpO2LeftDigit int // tens digit (for compatibility) + SpO2LeftConf float64 // confidence score + SpO2RightDigit int // ones digit (for compatibility) + SpO2RightConf float64 // same as SpO2LeftConf for whole-number matching + HRValue int + HRLeftDigit int + HRLeftConf float64 + HRRightDigit int + HRRightConf float64 + Failed bool + FailureReason string + Unstable bool + UnstableReason string +} + +// initReviewHTML creates the HTML file with header and opens the table +func initReviewHTML() error { + html := ` + + + + Pulse-Ox Recognition Review + + + + + + +

Pulse-Ox Recognition Review (Live)

+

Tip: Type correct number in input box and press Enter to save as template

+

Refresh page to see latest frames...

+ + + + + + +` + return os.WriteFile("review/review.html", []byte(html), 0644) +} + +// appendReviewEntry appends a single entry to the HTML file +func appendReviewEntry(e ReviewEntry) error { + confClass := func(conf float64) string { + if conf >= 95 { + return "conf-high" + } + return "conf-low" + } + + // Use left conf as the whole-number confidence + spo2Conf := e.SpO2LeftConf + hrConf := e.HRLeftConf + + needsReview := spo2Conf < 95 || hrConf < 95 + reviewMarker := "" + if needsReview { + reviewMarker = " āš ļøCHECK" + } + + var rowHTML string + + if e.Failed { + // Failed recognition entry + rowHTML = fmt.Sprintf(` + + + + +`, e.FrameNum, reviewMarker, e.Timestamp, + e.FailureReason, + e.FrameNum, e.FrameNum, e.FrameNum, + e.SpO2Value, confClass(spo2Conf), spo2Conf, e.FrameNum, + e.FrameNum, e.FrameNum, e.FrameNum, + e.HRValue, confClass(hrConf), hrConf, e.FrameNum) + } else { + // Successful recognition entry + unstableMarker := "" + if e.Unstable { + unstableMarker = fmt.Sprintf("
āš ļø %s", e.UnstableReason) + } + rowHTML = fmt.Sprintf(` + + + + +`, e.FrameNum, reviewMarker, unstableMarker, e.Timestamp, + e.FrameNum, e.FrameNum, e.FrameNum, + e.SpO2Value, confClass(spo2Conf), spo2Conf, e.FrameNum, + e.FrameNum, e.FrameNum, e.FrameNum, + e.HRValue, confClass(hrConf), hrConf, e.FrameNum) + } + + // Open file in append mode + f, err := os.OpenFile("review/review.html", os.O_APPEND|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(rowHTML) + return err +} + +// closeReviewHTML writes the closing tags +func closeReviewHTML() error { + footer := `
FrameTimeSpO2 / HR
%d%s%s + āŒ %s
+
+
+ +
+ %d
+ (%.0f%%)
+ +
+
+
+ +
+ %d
+ (%.0f%%)
+ +
+
+
+
%d%s%s%s +
+
+ +
+ %d
+ (%.0f%%)
+ +
+
+
+ +
+ %d
+ (%.0f%%)
+ +
+
+
+
+

+ Color coding: + Green ≄95%, + Red <95% +

+ +` + + f, err := os.OpenFile("review/review.html", os.O_APPEND|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(footer) + return err +} + +// writeReviewHTML - kept for backward compatibility (final shutdown) +func writeReviewHTML(entries []ReviewEntry) error { + // Just close the HTML properly - entries already appended + return closeReviewHTML() +} diff --git a/layout_detection.go b/layout_detection.go new file mode 100644 index 0000000..a851f1b --- /dev/null +++ b/layout_detection.go @@ -0,0 +1,52 @@ +package main + +import ( + "fmt" + "image" + "image/color" + + "gocv.io/x/gocv" +) + +// ScreenLayout holds the SpO2 and HR display areas +// Used by processor.go for OCR extraction +type ScreenLayout struct { + SpO2Area image.Rectangle + HRArea image.Rectangle +} + +// saveLayoutVisualization draws the detected layout on an image for debugging +func saveLayoutVisualization(img gocv.Mat, layout *ScreenLayout, filename string) { + visualization := gocv.NewMat() + defer visualization.Close() + + // Convert to color if grayscale + if img.Channels() == 1 { + gocv.CvtColor(img, &visualization, gocv.ColorGrayToBGR) + } else { + img.CopyTo(&visualization) + } + + // Draw SpO2 box in red + red := color.RGBA{255, 0, 0, 255} + gocv.Rectangle(&visualization, layout.SpO2Area, red, 3) + gocv.PutText(&visualization, "SpO2", image.Pt(layout.SpO2Area.Min.X, layout.SpO2Area.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, red, 2) + + // Draw HR box in cyan + cyan := color.RGBA{0, 255, 255, 255} + gocv.Rectangle(&visualization, layout.HRArea, cyan, 3) + gocv.PutText(&visualization, "HR", image.Pt(layout.HRArea.Min.X, layout.HRArea.Min.Y-10), + gocv.FontHersheyDuplex, 1.2, cyan, 2) + + // Add dimensions + spo2Text := fmt.Sprintf("%dx%d", layout.SpO2Area.Dx(), layout.SpO2Area.Dy()) + gocv.PutText(&visualization, spo2Text, image.Pt(layout.SpO2Area.Min.X, layout.SpO2Area.Max.Y+20), + gocv.FontHersheyPlain, 1.0, red, 1) + + hrText := fmt.Sprintf("%dx%d", layout.HRArea.Dx(), layout.HRArea.Dy()) + gocv.PutText(&visualization, hrText, image.Pt(layout.HRArea.Min.X, layout.HRArea.Max.Y+20), + gocv.FontHersheyPlain, 1.0, cyan, 1) + + gocv.IMWrite(filename, visualization) +} diff --git a/ocr.go b/ocr.go new file mode 100644 index 0000000..e9c4937 --- /dev/null +++ b/ocr.go @@ -0,0 +1,24 @@ +package main + +import ( + "gocv.io/x/gocv" +) + +// imagesMatch compares two images using template matching +// Returns true if they are very similar (>95% match) - not pixel-identical due to video compression +func imagesMatch(img1, img2 gocv.Mat) bool { + if img1.Empty() || img2.Empty() { + return false + } + if img1.Rows() != img2.Rows() || img1.Cols() != img2.Cols() { + return false + } + + result := gocv.NewMat() + gocv.MatchTemplate(img1, img2, &result, gocv.TmCcoeffNormed, gocv.NewMat()) + _, maxVal, _, _ := gocv.MinMaxLoc(result) + result.Close() + + // Video compression means frames are never pixel-identical, use 95% threshold + return maxVal >= 0.95 +} diff --git a/processor.go b/processor.go new file mode 100644 index 0000000..5f417e7 --- /dev/null +++ b/processor.go @@ -0,0 +1,153 @@ +package main + +import ( + "fmt" + "io" + "time" + + "gocv.io/x/gocv" +) + +// Processor handles the frame processing pipeline +type Processor struct { + config *Config + logger io.Writer +} + +// NewProcessor creates a new processor +func NewProcessor(config *Config, logger io.Writer) *Processor { + return &Processor{ + config: config, + logger: logger, + } +} + +// processFrame runs OCR on a normalized frame with valid layout +// Frame is already rotated and scaled by the caller +func (p *Processor) processFrame(frame gocv.Mat, rawFrame gocv.Mat, frameNum int, state *ProcessingState, timing *TimingData) ProcessingResult { + timestamp := time.Now().Format("15:04:05.000") + frameStartTime := time.Now() + + // Extract display regions + spo2Region := frame.Region(state.Layout.SpO2Area) + spo2Img := spo2Region.Clone() + spo2Region.Close() + + hrRegion := frame.Region(state.Layout.HRArea) + hrImg := hrRegion.Clone() + hrRegion.Close() + + // Check if images match previous - skip OCR if identical (uses image hash cache) + if state.HasPrevImages { + if imagesMatch(spo2Img, state.PrevSpO2Img) && imagesMatch(hrImg, state.PrevHRImg) { + spo2Img.Close() + hrImg.Close() + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{ + Status: StatusNoChange, + Reading: Reading{ + SpO2: state.PrevSpO2Result, + HR: state.PrevHRResult, + Timestamp: timestamp, + FrameNum: frameNum, + }, + } + } + } + + // Store current images for next comparison + if state.HasPrevImages { + state.PrevSpO2Img.Close() + state.PrevHRImg.Close() + } + state.PrevSpO2Img = spo2Img.Clone() + state.PrevHRImg = hrImg.Clone() + state.HasPrevImages = true + + // Run Gemini OCR on both displays + spo2Start := time.Now() + spo2Val, spo2Err := GeminiOCR(spo2Img, "SpO2") + timing.OCR_SpO2 = time.Since(spo2Start).Milliseconds() + + hrStart := time.Now() + hrVal, hrErr := GeminiOCR(hrImg, "HR") + timing.OCR_HR = time.Since(hrStart).Milliseconds() + + spo2Img.Close() + hrImg.Close() + + // Handle OCR errors + if spo2Err != nil || hrErr != nil { + errMsg := "" + if spo2Err != nil { + errMsg += fmt.Sprintf("SpO2: %v ", spo2Err) + } + if hrErr != nil { + errMsg += fmt.Sprintf("HR: %v", hrErr) + } + logMessage(Both, Warning, " āš ļø OCR error: %s", errMsg) + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{ + Status: StatusLowConfidence, + Reading: Reading{ + SpO2: spo2Val, + HR: hrVal, + Timestamp: timestamp, + FrameNum: frameNum, + }, + } + } + + // Store results for duplicate detection + state.PrevSpO2Result = spo2Val + state.PrevHRResult = hrVal + + reading := Reading{ + SpO2: spo2Val, + HR: hrVal, + Timestamp: timestamp, + FrameNum: frameNum, + } + + // Check for physiologically impossible values + if reading.SpO2 < 50 || reading.SpO2 > 100 || reading.HR < 30 || reading.HR > 220 { + logMessage(Both, Warning, " āš ļø Invalid values: SpO2=%d, HR=%d - skipping", reading.SpO2, reading.HR) + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{Status: StatusCorrupted, Reading: reading, ShouldPost: false} + } + + // Check if values changed from last posted + if reading.SpO2 == state.LastPosted.SpO2 && reading.HR == state.LastPosted.HR { + timing.Total = time.Since(frameStartTime).Milliseconds() + return ProcessingResult{Status: StatusNoChange, Reading: reading} + } + + // Values changed - update state and post + state.LastPosted = reading + timing.Total = time.Since(frameStartTime).Milliseconds() + + logMessage(Console, Info, "šŸ“Š SpO2: %d%% HR: %d bpm", reading.SpO2, reading.HR) + return ProcessingResult{Status: StatusSuccess, Reading: reading, ShouldPost: true} +} + +// postReading posts a reading to Home Assistant +func (p *Processor) postReading(reading *Reading, state *ProcessingState) error { + if p.config == nil { + logMessage(LogFile, Info, " ā“˜ Skipping HASS post (test mode)") + return nil + } + + spo2Err := postToHomeAssistant(p.config, "sensor.pulse_ox_spo2", reading.SpO2, "%", "SpO2") + hrErr := postToHomeAssistant(p.config, "sensor.pulse_ox_hr", reading.HR, "bpm", "Heart Rate") + + if spo2Err == nil && hrErr == nil { + state.SuccessCount++ + return nil + } + + state.FailureCount++ + if spo2Err != nil { + return spo2Err + } + return hrErr +} diff --git a/processor.go.backup b/processor.go.backup new file mode 100644 index 0000000..03724da --- /dev/null +++ b/processor.go.backup @@ -0,0 +1,488 @@ +package main + +import ( + "fmt" + "image" + "io" + "time" + + "gocv.io/x/gocv" +) + +// Processor handles the frame processing pipeline +type Processor struct { + templates map[int][]gocv.Mat + config *Config + logger io.Writer +} + +// NewProcessor creates a new processor +func NewProcessor(templates map[int][]gocv.Mat, config *Config, logger io.Writer) *Processor { + return &Processor{ + templates: templates, + config: config, + logger: logger, + } +} + +// processFrame runs OCR on a normalized frame with valid layout +// rawFrame is the untouched frame from source (used for saving on errors) +func (p *Processor) processFrame(frame gocv.Mat, rawFrame gocv.Mat, frameNum int, state *ProcessingState, timing *TimingData) ProcessingResult { + timestamp := time.Now().Format("15:04:05.000") + frameStartTime := time.Now() // Measure total processing time + + logMessage(LogFile, Info, "Frame #%d", frameNum) + + // Apply scaling if needed + scaleStart := time.Now() + var normalized gocv.Mat + if state.LockedScale != 1.0 { + newWidth := int(float64(frame.Cols()) * state.LockedScale) + newHeight := int(float64(frame.Rows()) * state.LockedScale) + normalized = gocv.NewMat() + gocv.Resize(frame, &normalized, image.Pt(newWidth, newHeight), 0, 0, gocv.InterpolationLinear) + defer normalized.Close() + logMessage(LogFile, Debug, " Applied scaling: %dx%d -> %dx%d (scale: %.3f)", + frame.Cols(), frame.Rows(), newWidth, newHeight, state.LockedScale) + } else { + normalized = frame + } + timing.Scale = time.Since(scaleStart).Milliseconds() + + // Run OCR + logMessage(LogFile, Info, " Recognizing displays...") + + spo2Start := time.Now() + spo2Val, spo2Left, spo2LeftConf, spo2Right, spo2RightConf := recognizeDisplayArea( + normalized, state.Layout.SpO2Area, p.templates, "SpO2", frameNum, p.logger) + timing.OCR_SpO2 = time.Since(spo2Start).Milliseconds() + + hrStart := time.Now() + hrVal, hrLeft, hrLeftConf, hrRight, hrRightConf := recognizeDisplayArea( + normalized, state.Layout.HRArea, p.templates, "HR", frameNum, p.logger) + timing.OCR_HR = time.Since(hrStart).Milliseconds() + + reading := Reading{ + SpO2: spo2Val, + SpO2LeftDigit: spo2Left, + SpO2LeftConf: spo2LeftConf, + SpO2RightDigit: spo2Right, + SpO2RightConf: spo2RightConf, + HR: hrVal, + HRLeftDigit: hrLeft, + HRLeftConf: hrLeftConf, + HRRightDigit: hrRight, + HRRightConf: hrRightConf, + Timestamp: timestamp, + FrameNum: frameNum, + } + + // Check for corruption (Handler #1) - ANY -1 digit means invalid/corrupted + // Silently skip - invalid patterns are working as intended + if reading.IsCorrupted() { + // Validation check is very fast (just checking digit values) + validationStart := time.Now() + // Reading.IsCorrupted() was already called in the if condition + timing.Validation += time.Since(validationStart).Milliseconds() + // Console + Log: descriptive message with frame number + logMessage(LogFile, Warning, " Frame #%d: Corruption detected - %s matched invalid pattern (marked as -1)", + frameNum, reading.GetCorruptionDetails()) + + logMessage(LogFile, Info, " [CORRUPTION] Frame #%d - Digit = -1 detected: SpO2(%d,%d) HR(%d,%d) - skipping frame", + frameNum, + reading.SpO2LeftDigit, reading.SpO2RightDigit, + reading.HRLeftDigit, reading.HRRightDigit) + + // Save debug output to test_output/ with timestamp (only in debug mode) + if DEBUG_MODE { + debugTimestamp := time.Now().Format("20060102_150405") + + // Save normalized frame + normalizedFilename := fmt.Sprintf("test_output/corruption_%s_frame%d_normalized.png", debugTimestamp, frameNum) + gocv.IMWrite(normalizedFilename, normalized) + + // Save layout visualization + layoutFilename := fmt.Sprintf("test_output/corruption_%s_frame%d_layout.jpg", debugTimestamp, frameNum) + saveLayoutVisualization(normalized, state.Layout, layoutFilename) + + logMessage(LogFile, Info, " šŸ’¾ Debug saved: %s", debugTimestamp) + } + + // Add to review HTML so we can see the digit images + entry := ReviewEntry{ + FrameNum: frameNum, + Timestamp: timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Failed: true, + FailureReason: fmt.Sprintf("Corruption: %s", reading.GetCorruptionDetails()), + } + state.ReviewEntries = append(state.ReviewEntries, entry) + // Append to live HTML + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + // Save thresholded raw frame (untouched, just thresholded) for replay/debugging + fileIOStart := time.Now() + saveThresholdedFrame(rawFrame, frameNum) + // Save layout visualization for debugging + saveLayoutVisualization(normalized, state.Layout, fmt.Sprintf("review/f%d_boxes.jpg", frameNum)) + timing.FileIO = time.Since(fileIOStart).Milliseconds() + + // Calculate total + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: StatusCorrupted, + Reading: reading, + } + } + + // Check for negative values (unrecognized digits) - treat as low confidence + if reading.SpO2 < 0 || reading.HR < 0 { + validationStart := time.Now() + // Validation is just the comparison above + timing.Validation += time.Since(validationStart).Milliseconds() + + logMessage(LogFile, Info, " [UNRECOGNIZED] SpO2=%d, HR=%d - treating as low confidence", + reading.SpO2, reading.HR) + + // Save thresholded raw frame (untouched, just thresholded) for replay/debugging + fileIOStart := time.Now() + saveThresholdedFrame(rawFrame, frameNum) + // Save layout visualization for debugging + saveLayoutVisualization(normalized, state.Layout, fmt.Sprintf("review/f%d_boxes.jpg", frameNum)) + timing.FileIO = time.Since(fileIOStart).Milliseconds() + + // Add failed entry to review + entry := ReviewEntry{ + FrameNum: frameNum, + Timestamp: timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Failed: true, + FailureReason: fmt.Sprintf("Unrecognized: SpO2=%d, HR=%d", reading.SpO2, reading.HR), + } + state.ReviewEntries = append(state.ReviewEntries, entry) + // Append to live HTML + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + // Calculate total + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: StatusLowConfidence, + Reading: reading, + } + } + + // Check for value changes + validationStart := time.Now() + valuesChanged := (reading.SpO2 != state.LastPosted.SpO2 || reading.HR != state.LastPosted.HR) + timing.Validation += time.Since(validationStart).Milliseconds() + + if !valuesChanged { + // Values didn't change - no files to clean up (only created in DEBUG_MODE) + // Calculate total + timing.Total = time.Since(frameStartTime).Milliseconds() + + // Log processing time for unchanged frames + logMessage(LogFile, Debug, " Frame #%d: SpO2=%d%%, HR=%d bpm (no change) - processed in %dms", + frameNum, reading.SpO2, reading.HR, timing.Total) + + return ProcessingResult{ + Status: StatusNoChange, + Reading: reading, + } + } + + // Values changed - update last posted values + state.LastPosted = reading + + // Print to console and log (MAIN OUTPUT) + // Note: timing.Total will be calculated at the end, so don't print it here + logMessage(Both, Info, "SpO2=%d%%, HR=%d bpm", reading.SpO2, reading.HR) + + // Check confidence (Handler #2) + validationStart = time.Now() + action, _ := validateConfidence(&reading, state, p.logger) + timing.Validation += time.Since(validationStart).Milliseconds() + + if action == ActionRetry { + // Increment and display low confidence counter + state.LowConfidenceCount++ + logMessage(Both, Warning, " Low confidence (#%d) - grabbing next frame immediately...", state.LowConfidenceCount) + + // Save thresholded raw frame (untouched, just thresholded) for replay/debugging + fileIOStart2 := time.Now() + saveThresholdedFrame(rawFrame, frameNum) + // Save layout visualization for debugging + saveLayoutVisualization(normalized, state.Layout, fmt.Sprintf("review/f%d_boxes.jpg", frameNum)) + timing.FileIO += time.Since(fileIOStart2).Milliseconds() + + // Add failed entry to review + spo2Avg, hrAvg := reading.AvgConfidence() + entry := ReviewEntry{ + FrameNum: frameNum, + Timestamp: timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Failed: true, + FailureReason: fmt.Sprintf("Low confidence: SpO2 %.1f%%, HR %.1f%%", spo2Avg, hrAvg), + } + state.ReviewEntries = append(state.ReviewEntries, entry) + // Append to live HTML + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + // Calculate total + timing.Total = time.Since(frameStartTime).Milliseconds() + + // Return status so main loop can handle escalation + return ProcessingResult{ + Status: StatusLowConfidence, + Reading: reading, + } + } else if action == ActionDiscard { + // Increment and display low confidence counter + state.LowConfidenceCount++ + logMessage(Both, Warning, " Low confidence after retry (#%d)", state.LowConfidenceCount) + + // Save thresholded raw frame (untouched, just thresholded) for replay/debugging + fileIOStart2 := time.Now() + saveThresholdedFrame(rawFrame, frameNum) + // Save layout visualization for debugging + saveLayoutVisualization(normalized, state.Layout, fmt.Sprintf("review/f%d_boxes.jpg", frameNum)) + timing.FileIO += time.Since(fileIOStart2).Milliseconds() + + // Add failed entry to review + spo2Avg, hrAvg := reading.AvgConfidence() + entry := ReviewEntry{ + FrameNum: frameNum, + Timestamp: timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Failed: true, + FailureReason: fmt.Sprintf("Low confidence: SpO2 %.1f%%, HR %.1f%%", spo2Avg, hrAvg), + } + state.ReviewEntries = append(state.ReviewEntries, entry) + // Append to live HTML + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + // Calculate total + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: StatusLowConfidence, + Reading: reading, + } + } + + // Check stability (Handler #3) + validationStart = time.Now() + action, reason := validateStability(&reading, state, p.logger) + timing.Validation += time.Since(validationStart).Milliseconds() + + switch action { + case ActionPost: + // Check for physiologically impossible values (< 40) + if reading.SpO2 < 40 || reading.HR < 40 { + timing.HASS = 0 // No HASS posting + + logMessage(Both, Warning, " Invalid physiological values: SpO2=%d, HR=%d (below 40) - not posting to HASS", + reading.SpO2, reading.HR) + // Add to review but don't post + entry := ReviewEntry{ + FrameNum: frameNum, + Timestamp: timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Unstable: false, + } + state.ReviewEntries = append(state.ReviewEntries, entry) + // Append to live HTML + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + // Calculate total + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: StatusSuccess, + Reading: reading, + ShouldPost: false, // Don't post invalid values + } + } + + // Add to review + entry := ReviewEntry{ + FrameNum: frameNum, + Timestamp: timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Unstable: false, + } + state.ReviewEntries = append(state.ReviewEntries, entry) + // Append to live HTML + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + // HASS posting happens in main loop after we return + // Will be timed there + timing.HASS = 0 // Placeholder, will be updated in main loop + + // Calculate total (before HASS post) + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: StatusSuccess, + Reading: reading, + ShouldPost: true, + } + + case ActionHold: + timing.HASS = 0 // No HASS posting + // Add to review with unstable marker + entry := ReviewEntry{ + FrameNum: frameNum, + Timestamp: timestamp, + SpO2Value: reading.SpO2, + SpO2LeftDigit: reading.SpO2LeftDigit, + SpO2LeftConf: reading.SpO2LeftConf, + SpO2RightDigit: reading.SpO2RightDigit, + SpO2RightConf: reading.SpO2RightConf, + HRValue: reading.HR, + HRLeftDigit: reading.HRLeftDigit, + HRLeftConf: reading.HRLeftConf, + HRRightDigit: reading.HRRightDigit, + HRRightConf: reading.HRRightConf, + Unstable: true, + UnstableReason: reason, + } + state.ReviewEntries = append(state.ReviewEntries, entry) + // Append to live HTML + if err := appendReviewEntry(entry); err != nil { + logMessage(LogFile, Warning, " Warning: Could not append to review HTML: %v", err) + } + + logMessage(LogFile, Warning, " %s - holding for validation", reason) + + // Calculate total + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: StatusUnstable, + Reading: reading, + ShouldPost: false, + UnstableReason: reason, + } + + default: + timing.HASS = 0 // No HASS posting + timing.Total = time.Since(frameStartTime).Milliseconds() + + return ProcessingResult{ + Status: StatusSuccess, + Reading: reading, + } + } +} + +// postReading posts a reading to Home Assistant +func (p *Processor) postReading(reading *Reading, state *ProcessingState) error { + // Skip posting if no config (single-frame test mode) + if p.config == nil { + logMessage(LogFile, Info, " ā“˜ Skipping HASS post (test mode)") + return nil + } + + spo2Err := postToHomeAssistant(p.config, "sensor.pulse_ox_spo2", reading.SpO2, "%", "SpO2") + hrErr := postToHomeAssistant(p.config, "sensor.pulse_ox_hr", reading.HR, "bpm", "Heart Rate") + + if spo2Err == nil && hrErr == nil { + state.SuccessCount++ + logMessage(LogFile, Info, " āœ“ Posted successfully (success: %d, fail: %d)", + state.SuccessCount, state.FailureCount) + return nil + } + + state.FailureCount++ + if spo2Err != nil { + logMessage(LogFile, Error, " āŒ SpO2 post error: %v", spo2Err) + } + if hrErr != nil { + logMessage(LogFile, Error, " āŒ HR post error: %v", hrErr) + } + logMessage(LogFile, Info, " (success: %d, fail: %d)", state.SuccessCount, state.FailureCount) + logMessage(Both, Error, " āŒ Post failed") + + if spo2Err != nil { + return spo2Err + } + return hrErr +} + +// saveThresholdedFrame saves the RAW thresholded frame +// Frame is ALREADY thresholded binary (from acquisition), just save it directly +// Useful for debugging and can be tested with ./pulseox-monitor raw_frames/thresh_*.png +func saveThresholdedFrame(frame gocv.Mat, frameNum int) { + // Frame is already thresholded binary - just save it + filename := fmt.Sprintf("raw_frames/thresh_%s-%05d.png", time.Now().Format("20060102"), frameNum) + gocv.IMWrite(filename, frame) +} diff --git a/pulseox-monitor b/pulseox-monitor new file mode 100755 index 0000000..05d232d Binary files /dev/null and b/pulseox-monitor differ diff --git a/pulseox-monitor.go b/pulseox-monitor.go new file mode 100644 index 0000000..c3ee4ad --- /dev/null +++ b/pulseox-monitor.go @@ -0,0 +1,691 @@ +package main + +import ( + "fmt" + "image" + "os" + "os/signal" + "strings" + "syscall" + "time" + + "gocv.io/x/gocv" +) + +const VERSION = "v4.0-gemini" + +// Threshold for binary conversion - lower values capture fainter displays +const BINARY_THRESHOLD = 180 + +// Global debug flag +var DEBUG_MODE = false + +// Global timing flag +var TIMING_MODE = false + +func main() { + // Check for /help flag first + for _, arg := range os.Args[1:] { + if arg == "/help" || arg == "--help" || arg == "-h" { + showHelp() + return + } + } + + logMessage(Console, Info, "=== Pulse-Ox Monitor %s (Unified Detection) ===", VERSION) + logMessage(Console, Info, "") + + // Note: gocv.SetNumThreads(1) removed - not available in gocv v0.31.0 + // Small images (280x200px) don't benefit much from multi-threading anyway + logMessage(Console, Info, "šŸ”§ OpenCV initialized") + logMessage(Console, Info, "") + + // Check for flags + for _, arg := range os.Args[1:] { + if arg == "/debug" { + DEBUG_MODE = true + DEBUG = true // Also enable detection.go debug + logMessage(Console, Info, "šŸ› DEBUG MODE ENABLED") + } else if arg == "/timing" { + TIMING_MODE = true + logMessage(Console, Info, "ā±ļø TIMING MODE ENABLED") + } + } + + // Check if running in single-frame test mode + if len(os.Args) >= 2 { + // Filter out flags + framePath := "" + for _, arg := range os.Args[1:] { + if arg != "/debug" && arg != "/timing" { + framePath = arg + break + } + } + if framePath != "" { + runSingleFrameMode(framePath) + return + } + } + + // Normal streaming mode + runStreamingMode() +} + +func showHelp() { + logMessage(Console, Info, "=== Pulse-Ox Monitor %s ===", VERSION) + logMessage(Console, Info, "") + logMessage(Console, Info, "USAGE:") + logMessage(Console, Info, " pulseox-monitor [options] [framefile]") + logMessage(Console, Info, "") + logMessage(Console, Info, "OPTIONS:") + logMessage(Console, Info, " /help Show this help message") + logMessage(Console, Info, " /debug Enable debug mode (extra diagnostic output)") + logMessage(Console, Info, " /timing Show timing table for performance analysis") + logMessage(Console, Info, "") + logMessage(Console, Info, "MODES:") + logMessage(Console, Info, " No arguments - Run in streaming mode (RTSP camera)") + logMessage(Console, Info, " [framefile] - Test mode: process single PNG file") + logMessage(Console, Info, "") + logMessage(Console, Info, "EXAMPLES:") + logMessage(Console, Info, " pulseox-monitor # Normal streaming") + logMessage(Console, Info, " pulseox-monitor /debug # Streaming with debug output") + logMessage(Console, Info, " pulseox-monitor /timing # Show performance timing") + logMessage(Console, Info, " pulseox-monitor raw_frames/thresh_*.png # Test single frame") + logMessage(Console, Info, "") + logMessage(Console, Info, "OUTPUT:") + logMessage(Console, Info, " review/ - Processed frame images and review.html") + logMessage(Console, Info, " raw_frames/ - Failed recognition frames (for debugging)") + logMessage(Console, Info, " test_output/ - Layout detection debug images") + logMessage(Console, Info, " pulse-monitor_*.log - Detailed execution log") + logMessage(Console, Info, "") +} + +func runSingleFrameMode(framePath string) { + logMessage(Console, Info, "=== Single Frame Test Mode ===") + logMessage(Console, Info, "Loading frame: %s", framePath) + logMessage(Console, Info, "") + + // Automatically enable DEBUG_MODE for file processing + DEBUG_MODE = true + DEBUG = true // detection.go debug flag + logMessage(Console, Info, "šŸ› DEBUG MODE AUTO-ENABLED (file processing)") + logMessage(Console, Info, "") + + // In test mode, all output goes to console only (no separate file log) + // Leave globalLogger as nil to avoid duplication when using Both target + globalLogger = nil + + // Setup + os.MkdirAll("test_output", 0755) + os.MkdirAll("review", 0755) + + // Reset OCR cache + ResetOCRCache() + + // Create frame source + source := NewFileSource(framePath) + defer source.Close() + + // Create processor + processor := NewProcessor(nil, nil) + state := NewProcessingState() + + // Process single frame with unified loop + processFrames(source, processor, state) + + logMessage(Console, Info, "") + logMessage(Console, Info, "āœ“ Single frame test complete") + logMessage(Console, Info, "") + logMessage(Console, Info, "=== OUTPUT FILES ===") + logMessage(Console, Info, "Test outputs:") + logMessage(Console, Info, " test_output/layout_boxes.jpg - Layout visualization") + logMessage(Console, Info, " review/f*_spo2_full.png - SpO2 recognition") + logMessage(Console, Info, " review/f*_hr_full.png - HR recognition") +} + +func runStreamingMode() { + // Create log file + logFilename := fmt.Sprintf("pulse-monitor_%s.log", time.Now().Format("20060102_150405")) + logFile, err := os.OpenFile(logFilename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + logMessage(Console, Warning, "Warning: Could not create log file: %v", err) + logMessage(Console, Info, "Continuing without file logging...") + logMessage(Console, Info, "") + globalLogger = nil + } else { + defer logFile.Close() + globalLogger = logFile + logMessage(Both, Info, "šŸ“ Logging to: %s", logFilename) + } + + // Setup directories - clean output directories in streaming mode + logMessage(Console, Info, "šŸ—‘ļø Cleaning output directories...") + + logMessage(Console, Info, " - review/... āœ“") + os.RemoveAll("review") + + logMessage(Console, Info, " - raw_frames/... āœ“") + os.RemoveAll("raw_frames") + + logMessage(Console, Info, " - test_output/... āœ“") + os.RemoveAll("test_output") + + os.MkdirAll("review", 0755) + os.MkdirAll("raw_frames", 0755) + os.MkdirAll("test_output", 0755) + logMessage(Console, Info, "") + + // Initialize live review HTML + if err := initReviewHTML(); err != nil { + logMessage(Console, Warning, "Warning: Could not initialize review HTML: %v", err) + } else { + logMessage(Console, Info, "āœ… Live review HTML initialized: review/review.html (refresh browser to see updates)") + } + + // Start review server + startReviewServer() + defer stopReviewServer() + + // Load config + config, err := LoadConfig("config.yaml") + if err != nil { + logMessage(Console, Error, "Error loading config: %v", err) + return + } + + // Reset OCR cache at startup + ResetOCRCache() + + // Initialize timestamp OCR client (reusable) + InitTimestampOCR() + defer CloseTimestampOCR() + + logMessage(Console, Info, "šŸ“Š All processed frames saved to review/") + logMessage(Console, Info, " Press Ctrl+C to stop and generate review.html") + logMessage(Console, Info, "") + + // Create RTSP source with reconnection handling + logMessage(Console, Info, "Connecting to RTSP stream...") + var source *RTSPSource + for { + source, err = NewRTSPSource(config.Camera.RTSPURL) + if err == nil { + break + } + logMessage(Console, Warning, "Failed to connect: %v", err) + logMessage(Console, Info, "Retrying in 5 seconds...") + time.Sleep(5 * time.Second) + } + defer source.Close() + + logMessage(Console, Info, "āœ“ Connected! Press Ctrl+C to stop") + logMessage(Console, Info, "Posting to HASS: %s", config.HomeAssistant.URL) + logMessage(Console, Info, "") + + // Send startup notification (but not if we already sent one recently) + if !recentStartupNotification() { + if err := sendNotification(config, "Pulse-Ox Monitor", "Monitoring started"); err != nil { + logMessage(Both, Warning, " Failed to send startup notification: %v", err) + } else { + markStartupNotification() + } + } + + // Setup signal handler + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) + go func() { + <-sigChan + logMessage(Console, Info, "") + logMessage(Console, Info, "") + logMessage(Console, Info, "šŸ›‘ Received stop signal, finishing up...") + source.Close() // Safe to call multiple times now (sync.Once) + }() + + // Create processor + processor := NewProcessor(config, logFile) + state := NewProcessingState() + + // Run unified processing loop + processFrames(source, processor, state) + + // Show OCR stats + calls, cacheHits, tokens := GetOCRStats() + cost := EstimateCost() + logMessage(Console, Info, "") + logMessage(Console, Info, "šŸ“Š Gemini OCR Stats:") + logMessage(Console, Info, " API calls: %d", calls) + logMessage(Console, Info, " Cache hits: %d", cacheHits) + logMessage(Console, Info, " Total tokens: %d", tokens) + logMessage(Console, Info, " Estimated cost: $%.4f", cost) + + // Close review HTML + logMessage(Console, Info, "") + logMessage(Console, Info, "šŸ“ Closing review.html...") + if err := closeReviewHTML(); err != nil { + logMessage(Console, Error, "Error closing review HTML: %v", err) + } else { + logMessage(Console, Info, "āœ“ Review page completed: review/review.html (%d frames)", len(state.ReviewEntries)) + logMessage(Console, Info, " Open it in browser to review recognition results") + } +} + +// interruptibleSleep sleeps for the specified duration, but checks every second if source is closed +func interruptibleSleep(source FrameSource, duration time.Duration) { + remaining := duration + for remaining > 0 { + sleepTime := time.Second + if remaining < sleepTime { + sleepTime = remaining + } + time.Sleep(sleepTime) + remaining -= sleepTime + + // Check if source was closed (Ctrl+C pressed) + if !source.IsActive() { + return + } + } +} + +const startupNotificationFile = ".startup_notified" +const detectionFailNotificationFile = ".detection_fail_notified" +const missingTemplateNotificationFile = ".missing_template_notified" + +// recentStartupNotification checks if we sent a startup notification in the last hour +func recentStartupNotification() bool { + info, err := os.Stat(startupNotificationFile) + if err != nil { + return false // File doesn't exist + } + // Check if file is less than 1 hour old + return time.Since(info.ModTime()) < 1*time.Hour +} + +// markStartupNotification creates/updates the marker file +func markStartupNotification() { + os.WriteFile(startupNotificationFile, []byte(time.Now().Format(time.RFC3339)), 0644) +} + +// recentDetectionFailNotification checks if we sent a detection failure notification in the last hour +func recentDetectionFailNotification() bool { + info, err := os.Stat(detectionFailNotificationFile) + if err != nil { + return false + } + return time.Since(info.ModTime()) < 1*time.Hour +} + +// markDetectionFailNotification creates/updates the marker file +func markDetectionFailNotification() { + os.WriteFile(detectionFailNotificationFile, []byte(time.Now().Format(time.RFC3339)), 0644) +} + +// loadNotifiedMissingTemplates loads previously notified templates from file +func loadNotifiedMissingTemplates() map[int]bool { + result := make(map[int]bool) + data, err := os.ReadFile(missingTemplateNotificationFile) + if err != nil { + return result + } + for _, line := range strings.Split(string(data), "\n") { + var num int + if _, err := fmt.Sscanf(line, "%d", &num); err == nil { + result[num] = true + } + } + return result +} + +// markMissingTemplateNotified adds a template to the notified list +func markMissingTemplateNotified(template int) { + f, err := os.OpenFile(missingTemplateNotificationFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return + } + defer f.Close() + fmt.Fprintf(f, "%d\n", template) +} + +// prepareBaseFrame converts raw camera frame to canonical base frame +// This is the ONLY place where grayscale/threshold/crop/rotate happens +// Returns: base (binary thresholded) +func prepareBaseFrame(raw gocv.Mat) gocv.Mat { + // 1. Grayscale + gray := gocv.NewMat() + gocv.CvtColor(raw, &gray, gocv.ColorBGRToGray) + defer gray.Close() + + // 2. Threshold to binary + thresholded := gocv.NewMat() + gocv.Threshold(gray, &thresholded, BINARY_THRESHOLD, 255, gocv.ThresholdBinary) + + // 3. Crop timestamp area (top 80px) + if thresholded.Rows() > 80 { + cropped := thresholded.Region(image.Rect(0, 80, thresholded.Cols(), thresholded.Rows())) + base := cropped.Clone() + cropped.Close() + thresholded.Close() + thresholded = base + } + + // 4. Rotate 90 degrees clockwise + rotated := gocv.NewMat() + gocv.Rotate(thresholded, &rotated, gocv.Rotate90Clockwise) + thresholded.Close() + + return rotated +} + +// tryDetectionWithRetries attempts detection, fetching fresh frames internally if needed +// Returns the detection result only - does NOT return or modify frames +// maxRetries is the number of additional frames to try after the first failure +func tryDetectionWithRetries(source FrameSource, initialBinary gocv.Mat, maxRetries int) DetectionResult { + // Try with the initial frame first + result := DetectRotationAndWidth(initialBinary) + if result.Success { + return result + } + + // Initial attempt failed - try a few more frames + + for retry := 1; retry <= maxRetries; retry++ { + // Get a fresh frame + frame, shouldContinue, err := source.Next() + if err != nil || frame.Empty() || !shouldContinue { + if !frame.Empty() { + frame.Close() + } + return DetectionResult{Success: false} + } + + // Prepare base frame (grayscale, threshold, crop, rotate) + binary := prepareBaseFrame(frame) + frame.Close() + + if binary.Empty() { + // Timestamp extraction failed - skip this frame + continue + } + + // Try detection + result = DetectRotationAndWidth(binary) + binary.Close() // Detection works on its own copy, so we close this + + if result.Success { + logMessage(Both, Info, " Detection succeeded on retry #%d", retry) + return result + } + } + + // All retries failed + return DetectionResult{Success: false} +} + +// processFrames is the UNIFIED processing loop - works for both RTSP and single-frame +func processFrames(source FrameSource, processor *Processor, state *ProcessingState) { + logMessage(Console, Info, "Processing frames from: %s", source.Name()) + logMessage(Console, Info, "") + + for { + loopStart := time.Now() + logMessage(Console, Debug, "[LOOP] Starting iteration") + + // Check if we're in a detection wait period + if !state.DetectWaitUntil.IsZero() && time.Now().Before(state.DetectWaitUntil) { + // Keep consuming frames to prevent buffer buildup + frame, shouldContinue, _ := source.Next() + if !frame.Empty() { + frame.Close() + } + if !shouldContinue || !source.IsActive() { + break + } + continue + } + // Clear wait if expired + if !state.DetectWaitUntil.IsZero() { + logMessage(Both, Info, "ā° Detection wait complete, resuming...") + state.DetectWaitUntil = time.Time{} + // Reset to stage 4 so we get 3 fast retries before next wait + // (stages 1-6 are fast, stage 7+ are waits) + state.DetectFailStage = 4 + } + + // Initialize timing data + var timing TimingData + acquireStart := time.Now() + + // Read raw frame from source + var raw gocv.Mat + var shouldContinue bool + var err error + + if state.ConsecutiveFailures == 1 { + // First failure: grab IMMEDIATE next frame (bypass skip) + logMessage(Console, Debug, " Using NextImmediate() due to consecutive failure") + raw, shouldContinue, err = source.NextImmediate() + } else { + // Normal operation or 2nd+ failure: use normal skip + raw, shouldContinue, err = source.Next() + } + timing.Acquire = time.Since(acquireStart).Milliseconds() + logMessage(Console, Debug, "[LOOP] Frame acquired in %dms", timing.Acquire) + + if err != nil { + logMessage(Both, Error, "Error reading frame: %v", err) + if !shouldContinue { + break + } + continue + } + if raw.Empty() { + if !shouldContinue { + break + } + continue + } + + // ========== PREPARE BASE FRAME ========== + // This is the ONLY place grayscale/threshold/crop/rotate happens + prepareStart := time.Now() + base := prepareBaseFrame(raw) + + if base.Empty() { + // Preprocessing failed - skip frame + raw.Close() + if !shouldContinue { + break + } + continue + } + timing.Preprocess = time.Since(prepareStart).Milliseconds() + + // Check timestamp drift every 10 frames (BEFORE closing raw, on colored frame) + state.ProcessedCount++ + state.TimestampCheckCounter++ + if state.TimestampCheckCounter >= 10 { + state.TimestampCheckCounter = 0 + diff, _, err := extractTimestamp(raw) + if err != nil { + logMessage(Both, Warning, " Timestamp OCR failed: %v", err) + } else if diff > 3 { + logMessage(Both, Warning, " Camera timestamp lag: %ds behind server", diff) + } else if diff < -3 { + logMessage(Both, Warning, " Camera timestamp ahead: %ds ahead of server", -diff) + } + // Silent if drift is within ±3 seconds + } + + raw.Close() + + // Save base frame for debugging (threshold to reduce file size - already done) + var rawThresholded gocv.Mat + if state.ProcessedCount == 1 { + gocv.IMWrite("test_output/pre_threshold_gray.png", base) + logMessage(Both, Info, "šŸ’¾ Saved base frame to test_output/pre_threshold_gray.png") + } + // Clone base for raw frame saving (used on errors) + rawThresholded = base.Clone() + + // ========== DETECTION (if needed) ========== + if state.NeedsDetection() { + logMessage(Both, Info, "šŸ” Running detection...") + + // Try detection on this frame + result := DetectRotationAndWidth(base) + + if !result.Success { + // Detection failed - increment failure counter + state.DetectFailStage++ + state.ConsecutiveFailures = 0 // Reset so we use normal frame interval, not NextImmediate + base.Close() + rawThresholded.Close() + + // Track when failures started + if state.DetectionFailStart.IsZero() { + state.DetectionFailStart = time.Now() + } + + // Notify after 5 minutes of continuous detection failure (once per hour) + if !state.NotifiedDetectionFailure && time.Since(state.DetectionFailStart) > 5*time.Minute { + if !recentDetectionFailNotification() { + state.NotifiedDetectionFailure = true + if err := sendNotification(processor.config, "Pulse-Ox Monitor", "Detection failing for 5+ minutes - pulse-ox not visible?"); err != nil { + logMessage(Both, Warning, " Failed to send notification: %v", err) + } else { + markDetectionFailNotification() + } + } + } + + switch state.DetectFailStage { + case 1, 2, 3, 4, 5, 6: + // Stages 1-6: Try immediately (~3 seconds at 2fps) + logMessage(Both, Warning, " Detection failed (#%d) - trying next frame...", state.DetectFailStage) + case 7: + // Stage 7: Wait 10s + state.DetectWaitUntil = time.Now().Add(10 * time.Second) + logMessage(Both, Warning, " Detection failed (#%d) - waiting 10s...", state.DetectFailStage) + case 8: + // Stage 8: Wait 30s + state.DetectWaitUntil = time.Now().Add(30 * time.Second) + logMessage(Both, Warning, " Detection failed (#%d) - waiting 30s...", state.DetectFailStage) + default: + // Stage 9+: Wait 60s + state.DetectWaitUntil = time.Now().Add(60 * time.Second) + logMessage(Both, Warning, " Detection failed (#%d) - waiting 60s...", state.DetectFailStage) + } + + if !shouldContinue { + break + } + continue + } + + // Detection succeeded - reset stage and store results + if state.DetectFailStage > 0 { + logMessage(Both, Info, "āœ… Detection recovered after %d attempts", state.DetectFailStage) + } + state.DetectFailStage = 0 + state.DetectionFailStart = time.Time{} // Reset failure tracking + state.NotifiedDetectionFailure = false // Allow future notifications + state.LockedRotation = result.Rotation + state.LockedScale = result.ScaleFactor + state.Layout = &ScreenLayout{ + SpO2Area: result.SpO2, + HRArea: result.HR, + } + state.LayoutValid = true + state.ConsecutiveFailures = 0 + + logMessage(Both, Info, "āœ“ Detection complete:") + logMessage(Both, Info, " Rotation: %.3f°", result.Rotation) + logMessage(Both, Info, " Scale: %.3f (width %dpx → 860px)", result.ScaleFactor, result.Width) + logMessage(Both, Info, " SpO2: X[%d-%d] Y[%d-%d]", + result.SpO2.Min.X, result.SpO2.Max.X, result.SpO2.Min.Y, result.SpO2.Max.Y) + logMessage(Both, Info, " HR: X[%d-%d] Y[%d-%d]", + result.HR.Min.X, result.HR.Max.X, result.HR.Min.Y, result.HR.Max.Y) + } + + // ========== APPLY TRANSFORMS ========== + // For LLM-OCR, we don't need scaling - just apply rotation if non-zero + var scaled gocv.Mat + if state.LockedRotation != 0 { + // Only apply rotation, no scaling (scale factor = 1.0) + scaled = applyTransforms(base, state.LockedRotation, 1.0) + base.Close() + } else { + // No transforms needed - use base directly + scaled = base + } + + // Validate frame dimensions vs layout coordinates + if state.Layout != nil { + logMessage(Both, Debug, "Frame size: %dx%d", scaled.Cols(), scaled.Rows()) + logMessage(Both, Debug, "SpO2 area: X[%d-%d] Y[%d-%d]", + state.Layout.SpO2Area.Min.X, state.Layout.SpO2Area.Max.X, + state.Layout.SpO2Area.Min.Y, state.Layout.SpO2Area.Max.Y) + logMessage(Both, Debug, "HR area: X[%d-%d] Y[%d-%d]", + state.Layout.HRArea.Min.X, state.Layout.HRArea.Max.X, + state.Layout.HRArea.Min.Y, state.Layout.HRArea.Max.Y) + + // Check bounds + if state.Layout.SpO2Area.Max.X > scaled.Cols() || state.Layout.SpO2Area.Max.Y > scaled.Rows() || + state.Layout.HRArea.Max.X > scaled.Cols() || state.Layout.HRArea.Max.Y > scaled.Rows() { + logMessage(Both, Error, "Layout coordinates exceed frame dimensions! Resetting detection.") + state.ResetDetection() + scaled.Close() + rawThresholded.Close() + continue + } + } + + // ========== PROCESS FRAME ========== + logMessage(Console, Debug, "[LOOP] Starting OCR") + timing.FrameNum = state.ProcessedCount + result := processor.processFrame(scaled, rawThresholded, state.ProcessedCount, state, &timing) + logMessage(Console, Debug, "[LOOP] OCR complete, status=%d", result.Status) + scaled.Close() + rawThresholded.Close() + + // Print timing table (header every 20 frames) + state.TimingFrameCount++ + showHeader := (state.TimingFrameCount % 20) == 1 + printTimingTable(timing, showHeader) + + // Handle result + switch result.Status { + case StatusSuccess: + state.ConsecutiveFailures = 0 + state.LowConfidenceCount = 0 // Reset on success + if result.ShouldPost { + hassStart := time.Now() + processor.postReading(&result.Reading, state) + timing.HASS = time.Since(hassStart).Milliseconds() + timing.Total += timing.HASS + } + + case StatusCorrupted: + // Just log it - don't re-detect. Boxes are locked. + state.ConsecutiveFailures++ + logMessage(LogFile, Warning, " Corrupted frame #%d", state.ConsecutiveFailures) + + case StatusLowConfidence: + // Just logged in processor.go - we continue anyway, boxes are locked + + case StatusNoChange: + // Normal - don't increment failure counter + + case StatusUnstable: + // Held for validation - don't increment failure counter + } + + if !shouldContinue { + break + } + + logMessage(Console, Debug, "[LOOP] Iteration complete in %dms", time.Since(loopStart).Milliseconds()) + } +} diff --git a/review/review.html b/review/review.html new file mode 100644 index 0000000..169e1fa --- /dev/null +++ b/review/review.html @@ -0,0 +1,108 @@ + + + + + Pulse-Ox Recognition Review + + + + + + +

Pulse-Ox Recognition Review (Live)

+

Tip: Type correct number in input box and press Enter to save as template

+

Refresh page to see latest frames...

+ + + + + + +
FrameTimeSpO2 / HR
+

+ Color coding: + Green ≄95%, + Red <95% +

+ + \ No newline at end of file diff --git a/review_server.go b/review_server.go new file mode 100644 index 0000000..5d17f46 --- /dev/null +++ b/review_server.go @@ -0,0 +1,34 @@ +package main + +import ( + "net/http" +) + +var reviewServer *http.Server + +// startReviewServer starts the HTTP server for the review interface +func startReviewServer() { + mux := http.NewServeMux() + + // Serve static files from review directory + mux.Handle("/", http.FileServer(http.Dir("review"))) + + reviewServer = &http.Server{ + Addr: ":8085", + Handler: mux, + } + + go func() { + logMessage(Console, Info, "🌐 Review server running at http://localhost:8085/review.html") + if err := reviewServer.ListenAndServe(); err != http.ErrServerClosed { + logMessage(Console, Warning, "Review server error: %v", err) + } + }() +} + +// stopReviewServer gracefully stops the HTTP server +func stopReviewServer() { + if reviewServer != nil { + reviewServer.Close() + } +} diff --git a/test_output/pre_threshold_gray.png b/test_output/pre_threshold_gray.png new file mode 100644 index 0000000..90307ef Binary files /dev/null and b/test_output/pre_threshold_gray.png differ diff --git a/test_raw_frame.png b/test_raw_frame.png new file mode 100644 index 0000000..337a114 Binary files /dev/null and b/test_raw_frame.png differ diff --git a/test_rotation.go b/test_rotation.go new file mode 100644 index 0000000..8cc164f --- /dev/null +++ b/test_rotation.go @@ -0,0 +1,230 @@ +//go:build ignore +// +build ignore + +// This is a standalone test program for rotation detection. +// Run with: go run test_rotation.go detection.go + +package main + +import ( + "fmt" + "log" + "os" + + "gocv.io/x/gocv" +) + +// DEBUG flag for this test program (separate from main build) +// Note: When running this test, set DEBUG = true to enable debug output in detection.go +var DEBUG = false + +func main() { + // Parse command line args + if len(os.Args) > 1 && os.Args[1] == "/debug" { + DEBUG = true + fmt.Println("DEBUG mode enabled\n") + } + + // Hardcode RTSP URL + streamURL := "rtsp://tapohass:!!Helder06@192.168.2.183:554/stream1" + + fmt.Println("Rotation Detection System") + fmt.Println("========================") + fmt.Println("Processing frames...\n") + + // Open RTSP stream + stream, err := gocv.OpenVideoCapture(streamURL) + if err != nil { + log.Fatalf("Failed to open stream: %v", err) + } + defer stream.Close() + + // Initialize detection state + rotation := 9999.0 // Sentinel value for "not detected" + width := 0 // 0 means "not detected" + scaleFactor := 0.0 // Scale factor from detection + + // Frame processing counters + framesSkipped := 0 + framesProcessed := 0 + detectionAttempts := 0 + totalFramesRead := 0 + SKIP_COUNT := 7 // Process every 8th frame + MAX_DETECTION_ATTEMPTS := 100 // Give up after 100 attempts + RE_DETECT_AFTER := 0 // Re-detect after N frames (0 = disabled) + + // Main processing loop + frame := gocv.NewMat() + defer frame.Close() + + for { + // Acquire a frame + if ok := stream.Read(&frame); !ok || frame.Empty() { + log.Fatal("Failed to read frame from stream") + } + totalFramesRead++ + + // Preprocess frame to binary + binary := PreprocessFrame(frame) + + // If rotation is 9999 or width is 0, detect bands/width/rotation + if rotation == 9999 || width == 0 || scaleFactor == 0.0 { + detectionAttempts++ + + // Give up if too many attempts + if detectionAttempts > MAX_DETECTION_ATTEMPTS { + fmt.Printf("āŒ Detection failed after %d attempts. Exiting.\n", MAX_DETECTION_ATTEMPTS) + break + } + + result := DetectRotationAndWidth(binary) + if result.Success { + rotation = result.Rotation + width = result.Width + scaleFactor = result.ScaleFactor + fmt.Printf("āœ“ DETECTION SUCCESSFUL (frame %d, attempt #%d):\n", totalFramesRead, detectionAttempts) + fmt.Printf(" Width=%dpx, Rotation=%.3f°, Scale=%.3f\n\n", width, rotation, scaleFactor) + detectionAttempts = 0 // Reset counter + } else { + // Show detection attempts with frame info + if detectionAttempts <= 5 { + if DEBUG { + fmt.Printf("Frame %d: Detection attempt #%d failed\n", totalFramesRead, detectionAttempts) + } + } else { + fmt.Printf("⚠ Frame %d: Detection attempt #%d failed - retrying...\n", totalFramesRead, detectionAttempts) + } + // Clean up and continue to next frame + binary.Close() + + // Skip a few frames to ensure display has changed + for skip := 0; skip < 3; skip++ { + if ok := stream.Read(&frame); !ok { + break + } + } + continue + } + } + + // If rotation != 9999 and width != 0 and scaleFactor != 0.0 and we've skipped enough frames + if rotation != 9999 && width != 0 && scaleFactor != 0.0 && framesSkipped >= SKIP_COUNT { + framesProcessed++ + + // Apply the same rotation correction to this frame + // (rotation was detected on a previous frame, now apply it to this one) + rotated := RotateImage(binary, rotation) + defer rotated.Close() + + // Save rotated frame BEFORE scaling + if DEBUG && framesProcessed == 1 { + gocv.IMWrite("debug_frame_001_AFTER_ROTATION_BEFORE_SCALE.png", rotated) + fmt.Println("DEBUG: Saved rotated frame before scaling") + } + + // Apply the same scaling using detected scale factor + // This scales the entire frame so the baseline becomes 860px + scaled := ScaleByFactor(rotated, scaleFactor) + defer scaled.Close() + + fmt.Printf("Frame #%d processed: Rotated %.3f°, Scaled to %dx%d (factor: %.3f, baseline: %dpx -> 860px)\n", + framesProcessed, rotation, scaled.Cols(), scaled.Rows(), scaleFactor, width) + + // Re-detect bands on scaled image for visualization + if DEBUG && framesProcessed <= 5 { + // Use the detected scale factor for minimum height + scaledMinHeight := int(80.0 * scaleFactor) + bands := DetectBands(scaled, scaledMinHeight) + + if len(bands) >= 2 { + // Find tallest band (graph) + tallestIdx := 0 + tallestHeight := 0 + for i, band := range bands { + height := band.maxY - band.minY + if height > tallestHeight { + tallestHeight = height + tallestIdx = i + } + } + + if tallestIdx < len(bands)-1 { + graphBand := bands[tallestIdx] + digitBand := bands[tallestIdx+1] + + // Extract digit region + digitRegion := ExtractBandRegion(scaled, digitBand) + defer digitRegion.Close() + + // Find digit boxes + digitBoxes := FindDigitBoxes(digitRegion, scaledMinHeight) + + if DEBUG { + fmt.Printf(" DEBUG: FindDigitBoxes returned %d boxes:\n", len(digitBoxes)) + for i, box := range digitBoxes { + fmt.Printf(" Box %d: X[%d-%d] Y[%d-%d] Size: %dx%d\n", + i+1, box.Min.X, box.Max.X, box.Min.Y, box.Max.Y, + box.Dx(), box.Dy()) + } + } + + // Calculate center from scaled baseline points + // Use the same proportion as detected + scaledCenterX := int(float64(scaled.Cols()) / 2.0) + + // Merge into displays + displayAreas := mergeDigitBoxesWithCenter(digitRegion, digitBoxes, scaledCenterX) + + fmt.Println(" DEBUG: Saving visualizations...") + + // Visualize bands + VisualizeSimpleBands(scaled, graphBand, digitBand, + fmt.Sprintf("debug_frame_%03d_bands.png", framesProcessed)) + + // Save digit region + gocv.IMWrite(fmt.Sprintf("debug_frame_%03d_digits.png", framesProcessed), digitRegion) + + // Visualize digit boxes + VisualizeDigitBoxes(digitRegion, digitBoxes, digitBand.minY, + fmt.Sprintf("debug_frame_%03d_digitboxes.png", framesProcessed)) + + // Visualize merged displays + VisualizeDetectedDisplays(digitRegion, displayAreas, digitBoxes, scaledCenterX, + fmt.Sprintf("debug_frame_%03d_merged.png", framesProcessed)) + + fmt.Printf(" DEBUG: Saved 4 visualization files for frame %d\n", framesProcessed) + } + } + } + + // Save scaled frame if in DEBUG mode + if DEBUG && framesProcessed <= 5 { + filename := fmt.Sprintf("debug_frame_%03d_scaled.png", framesProcessed) + gocv.IMWrite(filename, scaled) + fmt.Printf("DEBUG: Saved %s\n", filename) + } + + // TODO: Add OCR processing on 'scaled' image here in later phase + // Display areas (SpO2 and HR) are already detected and stored in result + // They will be available when we integrate this with the main monitoring system + + framesSkipped = 0 + + // Optional: Force re-detection after N frames to adapt to changes + if RE_DETECT_AFTER > 0 && framesProcessed >= RE_DETECT_AFTER { + fmt.Printf("\nšŸ”„ Re-detecting after %d frames...\n", RE_DETECT_AFTER) + rotation = 9999 + width = 0 + scaleFactor = 0.0 + framesProcessed = 0 + } + } else { + framesSkipped++ + } + + // Clean up + binary.Close() + } + + fmt.Println("\nProgram terminated.") +} diff --git a/timestamp_ocr.go b/timestamp_ocr.go new file mode 100644 index 0000000..019ae87 --- /dev/null +++ b/timestamp_ocr.go @@ -0,0 +1,85 @@ +package main + +import ( + "fmt" + "image" + "strings" + "time" + + "github.com/otiai10/gosseract/v2" + "gocv.io/x/gocv" +) + +// Global reusable OCR client +var timestampOCRClient *gosseract.Client + +// InitTimestampOCR initializes the reusable OCR client +func InitTimestampOCR() { + timestampOCRClient = gosseract.NewClient() + timestampOCRClient.SetPageSegMode(gosseract.PSM_SINGLE_LINE) + timestampOCRClient.SetWhitelist("0123456789-: ") +} + +// CloseTimestampOCR closes the OCR client +func CloseTimestampOCR() { + if timestampOCRClient != nil { + timestampOCRClient.Close() + } +} + +// extractTimestamp extracts and OCRs the timestamp from the top of the frame +// Returns: (secondsDifference, ocrDurationMs, error) +func extractTimestamp(frame gocv.Mat) (int, int64, error) { + startTime := time.Now() + + // Extract 1024x68 region + timestampRegion := frame.Region(image.Rect(0, 0, 1024, 68)) + defer timestampRegion.Close() + + // Downscale 50% (reduces OCR processing time) + downscaled := gocv.NewMat() + defer downscaled.Close() + gocv.Resize(timestampRegion, &downscaled, image.Point{512, 34}, 0, 0, gocv.InterpolationArea) + + // Grayscale + gray := gocv.NewMat() + defer gray.Close() + gocv.CvtColor(downscaled, &gray, gocv.ColorBGRToGray) + + // Binary threshold (Otsu automatically finds best threshold) + binary := gocv.NewMat() + defer binary.Close() + gocv.Threshold(gray, &binary, 0, 255, gocv.ThresholdBinary|gocv.ThresholdOtsu) + + // Invert (white text on black → black text on white for better OCR) + inverted := gocv.NewMat() + defer inverted.Close() + gocv.BitwiseNot(binary, &inverted) + + // Encode as PNG (lossless, better for text) + buf, err := gocv.IMEncode(".png", inverted) + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("failed to encode: %w", err) + } + defer buf.Close() + + // OCR with REUSED client (major speed boost) + err = timestampOCRClient.SetImageFromBytes(buf.GetBytes()) + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("failed to set image: %w", err) + } + + text, err := timestampOCRClient.Text() + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("OCR failed: %w", err) + } + + // Parse timestamp + text = strings.TrimSpace(strings.ReplaceAll(text, "\n", "")) + cameraTime, err := time.ParseInLocation("2006-01-02 15:04:05", text, time.Local) + if err != nil { + return 0, time.Since(startTime).Milliseconds(), fmt.Errorf("could not parse timestamp from: '%s'", text) + } + + return int(time.Since(cameraTime).Seconds()), time.Since(startTime).Milliseconds(), nil +} diff --git a/training_digits/122_1.png b/training_digits/122_1.png new file mode 100644 index 0000000..76e49ec Binary files /dev/null and b/training_digits/122_1.png differ diff --git a/training_digits/129_1.png b/training_digits/129_1.png new file mode 100644 index 0000000..94d28b0 Binary files /dev/null and b/training_digits/129_1.png differ diff --git a/training_digits/137_1.png b/training_digits/137_1.png new file mode 100644 index 0000000..1d3a4df Binary files /dev/null and b/training_digits/137_1.png differ diff --git a/training_digits/44_1.png b/training_digits/44_1.png new file mode 100644 index 0000000..d8c202b Binary files /dev/null and b/training_digits/44_1.png differ diff --git a/training_digits/76_1.png b/training_digits/76_1.png new file mode 100644 index 0000000..bcd2884 Binary files /dev/null and b/training_digits/76_1.png differ diff --git a/training_digits/94_1.png b/training_digits/94_1.png new file mode 100644 index 0000000..701d499 Binary files /dev/null and b/training_digits/94_1.png differ diff --git a/training_digits/97_1.png b/training_digits/97_1.png new file mode 100644 index 0000000..0d28162 Binary files /dev/null and b/training_digits/97_1.png differ diff --git a/types.go b/types.go new file mode 100644 index 0000000..fb31482 --- /dev/null +++ b/types.go @@ -0,0 +1,251 @@ +package main + +import ( + "fmt" + "time" + + "gocv.io/x/gocv" +) + +// Global DEBUG flag for detection.go (set by pulseox-monitor.go) +var DEBUG = false + +// ProcessingStatus represents the outcome of processing a frame +type ProcessingStatus int + +const ( + StatusSuccess ProcessingStatus = iota + StatusCorrupted + StatusLowConfidence + StatusNoChange + StatusUnstable +) + +// Reading holds SpO2 and HR values with confidence scores +type Reading struct { + SpO2 int + SpO2LeftDigit int + SpO2LeftConf float64 + SpO2RightDigit int + SpO2RightConf float64 + HR int + HRLeftDigit int + HRLeftConf float64 + HRRightDigit int + HRRightConf float64 + Timestamp string + FrameNum int +} + +// AvgConfidence returns average confidence for SpO2 and HR +func (r *Reading) AvgConfidence() (spo2Avg, hrAvg float64) { + spo2Avg = (r.SpO2LeftConf + r.SpO2RightConf) / 2.0 + hrAvg = (r.HRLeftConf + r.HRRightConf) / 2.0 + return +} + +// IsCorrupted checks if any digit is marked as invalid (-1) +func (r *Reading) IsCorrupted() bool { + return r.SpO2LeftDigit == -1 || r.SpO2RightDigit == -1 || + r.HRLeftDigit == -1 || r.HRRightDigit == -1 +} + +// GetCorruptionDetails returns a description of which digit(s) are corrupted +func (r *Reading) GetCorruptionDetails() string { + corrupted := []string{} + if r.SpO2LeftDigit == -1 { + corrupted = append(corrupted, "SpO2 digit 2") + } + if r.SpO2RightDigit == -1 { + corrupted = append(corrupted, "SpO2 digit 3") + } + if r.HRLeftDigit == -1 { + corrupted = append(corrupted, "HR digit 2") + } + if r.HRRightDigit == -1 { + corrupted = append(corrupted, "HR digit 3") + } + if len(corrupted) == 0 { + return "unknown" + } + result := "" + for i, c := range corrupted { + if i > 0 { + result += ", " + } + result += c + } + return result +} + +// ProcessingResult contains the outcome of processing a single frame +type ProcessingResult struct { + Status ProcessingStatus + Reading Reading + ShouldPost bool + UnstableReason string +} + +// ProcessingState tracks all state across frame processing +type ProcessingState struct { + // Detection state (sentinels: rotation=9999.0, scaleFactor=0.0) + LockedRotation float64 // Rotation angle in degrees (9999 = not detected) + LockedScale float64 // Scale factor (0 = not detected) + Layout *ScreenLayout // SpO2 and HR display areas + LayoutValid bool // True when detection has succeeded + + // Change detection + LastPosted Reading + + // Stability tracking + LastReading Reading + HasLastReading bool + PendingReading *Reading + BaselineReading Reading + + // Retry state + InRetry bool + RetryCount int + MaxRetries int + ConsecutiveFailures int // Track consecutive layout/processing failures + LowConfStage int // Low confidence retry stage: 0=none, 1=fresh frame, 2=redetect, 3=10s, 4=30s, 5+=60s + LowConfWaitUntil time.Time // When to resume after low confidence wait + DetectFailStage int // Detection failure stage: 0=none, 1-3=immediate, 4=10s, 5=30s, 6+=60s + DetectWaitUntil time.Time // When to resume after detection wait + + // Statistics + FrameCount int + ProcessedCount int + SuccessCount int + FailureCount int + LowConfidenceCount int + TimestampCheckCounter int // Check timestamp every 10 processed frames + + // Review entries + ReviewEntries []ReviewEntry + + // Timing tracking + TimingFrameCount int // Count frames for periodic header + + // Previous display images for duplicate detection + PrevSpO2Img gocv.Mat + PrevHRImg gocv.Mat + PrevSpO2Result int + PrevHRResult int + PrevSpO2Conf float64 + PrevHRConf float64 + HasPrevImages bool + + // Notification flood protection + NotifiedMissingTemplates map[int]bool // Templates we've already warned about + NotifiedDetectionFailure bool // Already notified about detection failure + DetectionFailStart time.Time // When detection failures started + + // OHLC ticker display + TickStart time.Time // Start of current tick period + TickDuration time.Duration // How long each tick lasts (default 5 min) + SpO2Open int // First SpO2 in tick + SpO2High int // Highest SpO2 in tick + SpO2Low int // Lowest SpO2 in tick + SpO2Close int // Current/last SpO2 + HROpen int // First HR in tick + HRHigh int // Highest HR in tick + HRLow int // Lowest HR in tick + HRClose int // Current/last HR + TickHasData bool // Whether we have any data in current tick +} + +// NewProcessingState creates initial state +func NewProcessingState() *ProcessingState { + return &ProcessingState{ + LockedRotation: 9999.0, // Sentinel: not detected + LockedScale: 0.0, // Sentinel: not detected + LayoutValid: false, + MaxRetries: 1, + LastPosted: Reading{SpO2: -1, HR: -1}, + LastReading: Reading{SpO2: -1, HR: -1}, + BaselineReading: Reading{SpO2: -1, HR: -1}, + NotifiedMissingTemplates: make(map[int]bool), + TickDuration: 5 * time.Minute, + } +} + +// UpdateOHLC updates the OHLC ticker values and prints the ticker line +func (s *ProcessingState) UpdateOHLC(spo2, hr int) { + now := time.Now() + + // Check if we need to start a new tick + if s.TickStart.IsZero() || now.Sub(s.TickStart) >= s.TickDuration { + // Print newline if we had data in previous tick + if s.TickHasData { + fmt.Println() // Move to new line + } + // Start new tick + s.TickStart = now + s.SpO2Open, s.SpO2High, s.SpO2Low, s.SpO2Close = spo2, spo2, spo2, spo2 + s.HROpen, s.HRHigh, s.HRLow, s.HRClose = hr, hr, hr, hr + s.TickHasData = true + } else { + // Update existing tick + s.SpO2Close = spo2 + s.HRClose = hr + if spo2 > s.SpO2High { + s.SpO2High = spo2 + } + if spo2 < s.SpO2Low { + s.SpO2Low = spo2 + } + if hr > s.HRHigh { + s.HRHigh = hr + } + if hr < s.HRLow { + s.HRLow = hr + } + } + + // Print ticker line (overwrite current line) + timestamp := now.Format("15:04:05") + fmt.Printf("\r%s SpO2: %d/%d/%d/%d HR: %d/%d/%d/%d ", + timestamp, + s.SpO2Open, s.SpO2High, s.SpO2Low, s.SpO2Close, + s.HROpen, s.HRHigh, s.HRLow, s.HRClose) +} + +// ResetDetection resets detection state to trigger re-detection +func (s *ProcessingState) ResetDetection() { + s.LockedRotation = 9999.0 + s.LockedScale = 0.0 + s.LayoutValid = false + s.Layout = nil +} + +// NeedsDetection returns true if detection needs to run +func (s *ProcessingState) NeedsDetection() bool { + return s.LockedRotation == 9999.0 || s.LockedScale == 0.0 || !s.LayoutValid +} + +// Action represents what to do next +type Action int + +const ( + ActionContinue Action = iota // Continue to next frame + ActionRetry // Retry with next frame + layout re-detection + ActionPost // Post reading to Home Assistant + ActionHold // Hold reading for validation + ActionDiscard // Discard reading +) + +// TimingData holds timing measurements for a single frame +type TimingData struct { + FrameNum int + Acquire int64 // Frame acquisition (ms) + Threshold int64 // Grayscale + threshold (ms) + Preprocess int64 // Crop + rotate (ms) + Scale int64 // Frame scaling (ms) + OCR_SpO2 int64 // SpO2 recognition (ms) + OCR_HR int64 // HR recognition (ms) + Validation int64 // Validation checks (ms) + FileIO int64 // Saving review images (ms) + HASS int64 // Home Assistant POST (ms) + Total int64 // Total processing time (ms) +}