#!/usr/bin/env python3 """Scan 4TB Mylio folder and find files not in Immich.""" import subprocess import hashlib import sqlite3 import os REMOTE = "macmini" REMOTE_PATH = "/Volumes/4TB/Mylio" LOCAL_DB = "/home/johan/immich-compare/hash_index.db" EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'} def get_immich_hashes(): """Load all Immich hashes from existing database.""" conn = sqlite3.connect(LOCAL_DB) c = conn.cursor() hashes = set() for row in c.execute("SELECT hash64k FROM files WHERE source='immich'"): hashes.add(row[0]) conn.close() print(f"Loaded {len(hashes)} Immich hashes") return hashes def scan_remote(): """Find all media files on remote 4TB Mylio.""" print(f"\nFinding files on {REMOTE}:{REMOTE_PATH}...") cmd = f"ssh {REMOTE} \"find '{REMOTE_PATH}' -type f 2>/dev/null\"" result = subprocess.run(cmd, shell=True, capture_output=True, text=True) files = [] for line in result.stdout.strip().split('\n'): if not line: continue ext = os.path.splitext(line)[1].lower() if ext in EXTENSIONS: files.append(line) print(f"Found {len(files)} media files") return files def hash_remote_file(filepath): """Get 64KB hash of remote file.""" cmd = f"ssh {REMOTE} \"head -c 65536 '{filepath}' 2>/dev/null\" | md5sum" result = subprocess.run(cmd, shell=True, capture_output=True, text=True) if result.returncode == 0: return result.stdout.split()[0] return None def main(): immich_hashes = get_immich_hashes() files = scan_remote() missing = [] matched = 0 print(f"\nHashing files and comparing...") for i, filepath in enumerate(files): if (i + 1) % 100 == 0: print(f" {i+1}/{len(files)} - {matched} matched, {len(missing)} missing") h = hash_remote_file(filepath) if h is None: continue if h in immich_hashes: matched += 1 else: missing.append(filepath) print(f"\n{'='*50}") print(f"Results:") print(f" Total scanned: {len(files)}") print(f" Already in Immich: {matched}") print(f" NOT in Immich: {len(missing)}") # Save missing list with open('/tmp/4tb_mylio_missing.txt', 'w') as f: for p in missing: f.write(p + '\n') print(f"\nMissing files saved to /tmp/4tb_mylio_missing.txt") return missing if __name__ == "__main__": main()