immich-compare/scan_4tb_mylio.py

87 lines
2.5 KiB
Python

#!/usr/bin/env python3
"""Scan 4TB Mylio folder and find files not in Immich."""
import subprocess
import hashlib
import sqlite3
import os
REMOTE = "macmini"
REMOTE_PATH = "/Volumes/4TB/Mylio"
LOCAL_DB = "/home/johan/immich-compare/hash_index.db"
EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'}
def get_immich_hashes():
"""Load all Immich hashes from existing database."""
conn = sqlite3.connect(LOCAL_DB)
c = conn.cursor()
hashes = set()
for row in c.execute("SELECT hash64k FROM files WHERE source='immich'"):
hashes.add(row[0])
conn.close()
print(f"Loaded {len(hashes)} Immich hashes")
return hashes
def scan_remote():
"""Find all media files on remote 4TB Mylio."""
print(f"\nFinding files on {REMOTE}:{REMOTE_PATH}...")
cmd = f"ssh {REMOTE} \"find '{REMOTE_PATH}' -type f 2>/dev/null\""
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
files = []
for line in result.stdout.strip().split('\n'):
if not line:
continue
ext = os.path.splitext(line)[1].lower()
if ext in EXTENSIONS:
files.append(line)
print(f"Found {len(files)} media files")
return files
def hash_remote_file(filepath):
"""Get 64KB hash of remote file."""
cmd = f"ssh {REMOTE} \"head -c 65536 '{filepath}' 2>/dev/null\" | md5sum"
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.split()[0]
return None
def main():
immich_hashes = get_immich_hashes()
files = scan_remote()
missing = []
matched = 0
print(f"\nHashing files and comparing...")
for i, filepath in enumerate(files):
if (i + 1) % 100 == 0:
print(f" {i+1}/{len(files)} - {matched} matched, {len(missing)} missing")
h = hash_remote_file(filepath)
if h is None:
continue
if h in immich_hashes:
matched += 1
else:
missing.append(filepath)
print(f"\n{'='*50}")
print(f"Results:")
print(f" Total scanned: {len(files)}")
print(f" Already in Immich: {matched}")
print(f" NOT in Immich: {len(missing)}")
# Save missing list
with open('/tmp/4tb_mylio_missing.txt', 'w') as f:
for p in missing:
f.write(p + '\n')
print(f"\nMissing files saved to /tmp/4tb_mylio_missing.txt")
return missing
if __name__ == "__main__":
main()