87 lines
2.5 KiB
Python
87 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Scan 4TB Mylio folder and find files not in Immich."""
|
|
|
|
import subprocess
|
|
import hashlib
|
|
import sqlite3
|
|
import os
|
|
|
|
REMOTE = "macmini"
|
|
REMOTE_PATH = "/Volumes/4TB/Mylio"
|
|
LOCAL_DB = "/home/johan/immich-compare/hash_index.db"
|
|
EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'}
|
|
|
|
def get_immich_hashes():
|
|
"""Load all Immich hashes from existing database."""
|
|
conn = sqlite3.connect(LOCAL_DB)
|
|
c = conn.cursor()
|
|
hashes = set()
|
|
for row in c.execute("SELECT hash64k FROM files WHERE source='immich'"):
|
|
hashes.add(row[0])
|
|
conn.close()
|
|
print(f"Loaded {len(hashes)} Immich hashes")
|
|
return hashes
|
|
|
|
def scan_remote():
|
|
"""Find all media files on remote 4TB Mylio."""
|
|
print(f"\nFinding files on {REMOTE}:{REMOTE_PATH}...")
|
|
cmd = f"ssh {REMOTE} \"find '{REMOTE_PATH}' -type f 2>/dev/null\""
|
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
|
|
|
files = []
|
|
for line in result.stdout.strip().split('\n'):
|
|
if not line:
|
|
continue
|
|
ext = os.path.splitext(line)[1].lower()
|
|
if ext in EXTENSIONS:
|
|
files.append(line)
|
|
|
|
print(f"Found {len(files)} media files")
|
|
return files
|
|
|
|
def hash_remote_file(filepath):
|
|
"""Get 64KB hash of remote file."""
|
|
cmd = f"ssh {REMOTE} \"head -c 65536 '{filepath}' 2>/dev/null\" | md5sum"
|
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
|
if result.returncode == 0:
|
|
return result.stdout.split()[0]
|
|
return None
|
|
|
|
def main():
|
|
immich_hashes = get_immich_hashes()
|
|
files = scan_remote()
|
|
|
|
missing = []
|
|
matched = 0
|
|
|
|
print(f"\nHashing files and comparing...")
|
|
for i, filepath in enumerate(files):
|
|
if (i + 1) % 100 == 0:
|
|
print(f" {i+1}/{len(files)} - {matched} matched, {len(missing)} missing")
|
|
|
|
h = hash_remote_file(filepath)
|
|
if h is None:
|
|
continue
|
|
|
|
if h in immich_hashes:
|
|
matched += 1
|
|
else:
|
|
missing.append(filepath)
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"Results:")
|
|
print(f" Total scanned: {len(files)}")
|
|
print(f" Already in Immich: {matched}")
|
|
print(f" NOT in Immich: {len(missing)}")
|
|
|
|
# Save missing list
|
|
with open('/tmp/4tb_mylio_missing.txt', 'w') as f:
|
|
for p in missing:
|
|
f.write(p + '\n')
|
|
|
|
print(f"\nMissing files saved to /tmp/4tb_mylio_missing.txt")
|
|
return missing
|
|
|
|
if __name__ == "__main__":
|
|
main()
|