immich-compare/build_mylio_db.py

178 lines
5.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""Build a SQLite database from local Mylio backup with EXIF metadata."""
import subprocess
import sqlite3
import os
import json
import re
from pathlib import Path
DB_PATH = "/home/johan/immich-compare/mylio_index.db"
MYLIO_PATH = "/tank/mylio-backup/Mylio"
EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'}
def create_db():
if os.path.exists(DB_PATH):
os.remove(DB_PATH)
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''CREATE TABLE files (
id INTEGER PRIMARY KEY,
filename TEXT,
filepath TEXT,
filesize INTEGER,
year_folder TEXT,
date_original TEXT,
create_date TEXT,
make TEXT,
model TEXT,
software TEXT,
comment TEXT,
gps_lat REAL,
gps_lon REAL,
width INTEGER,
height INTEGER,
color_profile TEXT,
xmp_date TEXT
)''')
c.execute('CREATE INDEX idx_filename ON files(filename)')
c.execute('CREATE INDEX idx_filesize ON files(filesize)')
c.execute('CREATE INDEX idx_filename_size ON files(filename, filesize)')
c.execute('CREATE INDEX idx_date ON files(date_original)')
c.execute('CREATE INDEX idx_model ON files(model)')
c.execute('CREATE INDEX idx_software ON files(software)')
conn.commit()
return conn
def get_exif_batch(files):
"""Get EXIF data for multiple files using exiftool JSON output."""
if not files:
return []
cmd = ['exiftool', '-json', '-fast',
'-DateTimeOriginal', '-CreateDate', '-Make', '-Model',
'-Software', '-Comment', '-GPSLatitude', '-GPSLongitude',
'-ImageWidth', '-ImageHeight', '-ProfileDescription'] + files
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.stdout:
return json.loads(result.stdout)
except:
pass
return []
def get_xmp_date(filepath):
"""Read DateTimeOriginal from XMP sidecar if exists."""
for xmp_path in [filepath + '.xmp', re.sub(r'\.[^.]+$', '.xmp', filepath)]:
if os.path.exists(xmp_path):
try:
with open(xmp_path, 'r', errors='ignore') as f:
content = f.read()
match = re.search(r'DateTimeOriginal="([^"]+)"', content)
if match:
return match.group(1)[:10]
except:
pass
return None
def main():
print("Building Mylio database from /tank/mylio-backup/Mylio...")
conn = create_db()
c = conn.cursor()
# Collect all files
all_files = []
for root, dirs, files in os.walk(MYLIO_PATH):
for fname in files:
ext = os.path.splitext(fname)[1].lower()
if ext in EXTENSIONS:
all_files.append(os.path.join(root, fname))
print(f"Found {len(all_files)} media files")
# Process in batches
batch_size = 100
count = 0
for i in range(0, len(all_files), batch_size):
batch = all_files[i:i+batch_size]
exif_data = get_exif_batch(batch)
# Create lookup by source file
exif_lookup = {}
for item in exif_data:
src = item.get('SourceFile', '')
exif_lookup[src] = item
for filepath in batch:
filename = os.path.basename(filepath)
filesize = os.path.getsize(filepath)
# Extract year folder
year_match = re.search(r'/Mylio/(\d{4})/', filepath)
year_folder = year_match.group(1) if year_match else None
# Get EXIF data
exif = exif_lookup.get(filepath, {})
date_original = exif.get('DateTimeOriginal', '')
if date_original:
date_original = str(date_original)[:10].replace(':', '-')
create_date = exif.get('CreateDate', '')
if create_date:
create_date = str(create_date)[:10].replace(':', '-')
# Get XMP date
xmp_date = get_xmp_date(filepath)
# GPS coordinates
gps_lat = exif.get('GPSLatitude')
gps_lon = exif.get('GPSLongitude')
if isinstance(gps_lat, str):
gps_lat = None
if isinstance(gps_lon, str):
gps_lon = None
c.execute('''INSERT INTO files
(filename, filepath, filesize, year_folder, date_original, create_date,
make, model, software, comment, gps_lat, gps_lon, width, height,
color_profile, xmp_date)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
(filename, filepath, filesize, year_folder,
date_original or None, create_date or None,
exif.get('Make'), exif.get('Model'), exif.get('Software'),
exif.get('Comment'), gps_lat, gps_lon,
exif.get('ImageWidth'), exif.get('ImageHeight'),
exif.get('ProfileDescription'), xmp_date))
count += 1
if count % 1000 == 0:
print(f" Processed {count} files...")
conn.commit()
conn.commit()
# Print summary
c.execute("SELECT COUNT(*) FROM files")
total = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM files WHERE date_original IS NOT NULL OR xmp_date IS NOT NULL")
with_date = c.fetchone()[0]
c.execute("SELECT COUNT(DISTINCT model) FROM files WHERE model IS NOT NULL")
cameras = c.fetchone()[0]
print(f"\nDone! Created {DB_PATH}")
print(f" Total files: {total}")
print(f" Files with dates: {with_date}")
print(f" Unique cameras: {cameras}")
conn.close()
if __name__ == "__main__":
main()