#!/usr/bin/env python3 """Build a SQLite database from local Mylio backup with EXIF metadata.""" import subprocess import sqlite3 import os import json import re from pathlib import Path DB_PATH = "/home/johan/immich-compare/mylio_index.db" MYLIO_PATH = "/tank/mylio-backup/Mylio" EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'} def create_db(): if os.path.exists(DB_PATH): os.remove(DB_PATH) conn = sqlite3.connect(DB_PATH) c = conn.cursor() c.execute('''CREATE TABLE files ( id INTEGER PRIMARY KEY, filename TEXT, filepath TEXT, filesize INTEGER, year_folder TEXT, date_original TEXT, create_date TEXT, make TEXT, model TEXT, software TEXT, comment TEXT, gps_lat REAL, gps_lon REAL, width INTEGER, height INTEGER, color_profile TEXT, xmp_date TEXT )''') c.execute('CREATE INDEX idx_filename ON files(filename)') c.execute('CREATE INDEX idx_filesize ON files(filesize)') c.execute('CREATE INDEX idx_filename_size ON files(filename, filesize)') c.execute('CREATE INDEX idx_date ON files(date_original)') c.execute('CREATE INDEX idx_model ON files(model)') c.execute('CREATE INDEX idx_software ON files(software)') conn.commit() return conn def get_exif_batch(files): """Get EXIF data for multiple files using exiftool JSON output.""" if not files: return [] cmd = ['exiftool', '-json', '-fast', '-DateTimeOriginal', '-CreateDate', '-Make', '-Model', '-Software', '-Comment', '-GPSLatitude', '-GPSLongitude', '-ImageWidth', '-ImageHeight', '-ProfileDescription'] + files try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) if result.stdout: return json.loads(result.stdout) except: pass return [] def get_xmp_date(filepath): """Read DateTimeOriginal from XMP sidecar if exists.""" for xmp_path in [filepath + '.xmp', re.sub(r'\.[^.]+$', '.xmp', filepath)]: if os.path.exists(xmp_path): try: with open(xmp_path, 'r', errors='ignore') as f: content = f.read() match = re.search(r'DateTimeOriginal="([^"]+)"', content) if match: return match.group(1)[:10] except: pass return None def main(): print("Building Mylio database from /tank/mylio-backup/Mylio...") conn = create_db() c = conn.cursor() # Collect all files all_files = [] for root, dirs, files in os.walk(MYLIO_PATH): for fname in files: ext = os.path.splitext(fname)[1].lower() if ext in EXTENSIONS: all_files.append(os.path.join(root, fname)) print(f"Found {len(all_files)} media files") # Process in batches batch_size = 100 count = 0 for i in range(0, len(all_files), batch_size): batch = all_files[i:i+batch_size] exif_data = get_exif_batch(batch) # Create lookup by source file exif_lookup = {} for item in exif_data: src = item.get('SourceFile', '') exif_lookup[src] = item for filepath in batch: filename = os.path.basename(filepath) filesize = os.path.getsize(filepath) # Extract year folder year_match = re.search(r'/Mylio/(\d{4})/', filepath) year_folder = year_match.group(1) if year_match else None # Get EXIF data exif = exif_lookup.get(filepath, {}) date_original = exif.get('DateTimeOriginal', '') if date_original: date_original = str(date_original)[:10].replace(':', '-') create_date = exif.get('CreateDate', '') if create_date: create_date = str(create_date)[:10].replace(':', '-') # Get XMP date xmp_date = get_xmp_date(filepath) # GPS coordinates gps_lat = exif.get('GPSLatitude') gps_lon = exif.get('GPSLongitude') if isinstance(gps_lat, str): gps_lat = None if isinstance(gps_lon, str): gps_lon = None c.execute('''INSERT INTO files (filename, filepath, filesize, year_folder, date_original, create_date, make, model, software, comment, gps_lat, gps_lon, width, height, color_profile, xmp_date) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', (filename, filepath, filesize, year_folder, date_original or None, create_date or None, exif.get('Make'), exif.get('Model'), exif.get('Software'), exif.get('Comment'), gps_lat, gps_lon, exif.get('ImageWidth'), exif.get('ImageHeight'), exif.get('ProfileDescription'), xmp_date)) count += 1 if count % 1000 == 0: print(f" Processed {count} files...") conn.commit() conn.commit() # Print summary c.execute("SELECT COUNT(*) FROM files") total = c.fetchone()[0] c.execute("SELECT COUNT(*) FROM files WHERE date_original IS NOT NULL OR xmp_date IS NOT NULL") with_date = c.fetchone()[0] c.execute("SELECT COUNT(DISTINCT model) FROM files WHERE model IS NOT NULL") cameras = c.fetchone()[0] print(f"\nDone! Created {DB_PATH}") print(f" Total files: {total}") print(f" Files with dates: {with_date}") print(f" Unique cameras: {cameras}") conn.close() if __name__ == "__main__": main()