178 lines
5.8 KiB
Python
Executable File
178 lines
5.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Build a SQLite database from local Mylio backup with EXIF metadata."""
|
|
|
|
import subprocess
|
|
import sqlite3
|
|
import os
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
DB_PATH = "/home/johan/immich-compare/mylio_index.db"
|
|
MYLIO_PATH = "/tank/mylio-backup/Mylio"
|
|
|
|
EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'}
|
|
|
|
def create_db():
|
|
if os.path.exists(DB_PATH):
|
|
os.remove(DB_PATH)
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
c = conn.cursor()
|
|
c.execute('''CREATE TABLE files (
|
|
id INTEGER PRIMARY KEY,
|
|
filename TEXT,
|
|
filepath TEXT,
|
|
filesize INTEGER,
|
|
year_folder TEXT,
|
|
date_original TEXT,
|
|
create_date TEXT,
|
|
make TEXT,
|
|
model TEXT,
|
|
software TEXT,
|
|
comment TEXT,
|
|
gps_lat REAL,
|
|
gps_lon REAL,
|
|
width INTEGER,
|
|
height INTEGER,
|
|
color_profile TEXT,
|
|
xmp_date TEXT
|
|
)''')
|
|
c.execute('CREATE INDEX idx_filename ON files(filename)')
|
|
c.execute('CREATE INDEX idx_filesize ON files(filesize)')
|
|
c.execute('CREATE INDEX idx_filename_size ON files(filename, filesize)')
|
|
c.execute('CREATE INDEX idx_date ON files(date_original)')
|
|
c.execute('CREATE INDEX idx_model ON files(model)')
|
|
c.execute('CREATE INDEX idx_software ON files(software)')
|
|
conn.commit()
|
|
return conn
|
|
|
|
def get_exif_batch(files):
|
|
"""Get EXIF data for multiple files using exiftool JSON output."""
|
|
if not files:
|
|
return []
|
|
|
|
cmd = ['exiftool', '-json', '-fast',
|
|
'-DateTimeOriginal', '-CreateDate', '-Make', '-Model',
|
|
'-Software', '-Comment', '-GPSLatitude', '-GPSLongitude',
|
|
'-ImageWidth', '-ImageHeight', '-ProfileDescription'] + files
|
|
|
|
try:
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
|
if result.stdout:
|
|
return json.loads(result.stdout)
|
|
except:
|
|
pass
|
|
return []
|
|
|
|
def get_xmp_date(filepath):
|
|
"""Read DateTimeOriginal from XMP sidecar if exists."""
|
|
for xmp_path in [filepath + '.xmp', re.sub(r'\.[^.]+$', '.xmp', filepath)]:
|
|
if os.path.exists(xmp_path):
|
|
try:
|
|
with open(xmp_path, 'r', errors='ignore') as f:
|
|
content = f.read()
|
|
match = re.search(r'DateTimeOriginal="([^"]+)"', content)
|
|
if match:
|
|
return match.group(1)[:10]
|
|
except:
|
|
pass
|
|
return None
|
|
|
|
def main():
|
|
print("Building Mylio database from /tank/mylio-backup/Mylio...")
|
|
conn = create_db()
|
|
c = conn.cursor()
|
|
|
|
# Collect all files
|
|
all_files = []
|
|
for root, dirs, files in os.walk(MYLIO_PATH):
|
|
for fname in files:
|
|
ext = os.path.splitext(fname)[1].lower()
|
|
if ext in EXTENSIONS:
|
|
all_files.append(os.path.join(root, fname))
|
|
|
|
print(f"Found {len(all_files)} media files")
|
|
|
|
# Process in batches
|
|
batch_size = 100
|
|
count = 0
|
|
|
|
for i in range(0, len(all_files), batch_size):
|
|
batch = all_files[i:i+batch_size]
|
|
exif_data = get_exif_batch(batch)
|
|
|
|
# Create lookup by source file
|
|
exif_lookup = {}
|
|
for item in exif_data:
|
|
src = item.get('SourceFile', '')
|
|
exif_lookup[src] = item
|
|
|
|
for filepath in batch:
|
|
filename = os.path.basename(filepath)
|
|
filesize = os.path.getsize(filepath)
|
|
|
|
# Extract year folder
|
|
year_match = re.search(r'/Mylio/(\d{4})/', filepath)
|
|
year_folder = year_match.group(1) if year_match else None
|
|
|
|
# Get EXIF data
|
|
exif = exif_lookup.get(filepath, {})
|
|
|
|
date_original = exif.get('DateTimeOriginal', '')
|
|
if date_original:
|
|
date_original = str(date_original)[:10].replace(':', '-')
|
|
|
|
create_date = exif.get('CreateDate', '')
|
|
if create_date:
|
|
create_date = str(create_date)[:10].replace(':', '-')
|
|
|
|
# Get XMP date
|
|
xmp_date = get_xmp_date(filepath)
|
|
|
|
# GPS coordinates
|
|
gps_lat = exif.get('GPSLatitude')
|
|
gps_lon = exif.get('GPSLongitude')
|
|
if isinstance(gps_lat, str):
|
|
gps_lat = None
|
|
if isinstance(gps_lon, str):
|
|
gps_lon = None
|
|
|
|
c.execute('''INSERT INTO files
|
|
(filename, filepath, filesize, year_folder, date_original, create_date,
|
|
make, model, software, comment, gps_lat, gps_lon, width, height,
|
|
color_profile, xmp_date)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
|
|
(filename, filepath, filesize, year_folder,
|
|
date_original or None, create_date or None,
|
|
exif.get('Make'), exif.get('Model'), exif.get('Software'),
|
|
exif.get('Comment'), gps_lat, gps_lon,
|
|
exif.get('ImageWidth'), exif.get('ImageHeight'),
|
|
exif.get('ProfileDescription'), xmp_date))
|
|
|
|
count += 1
|
|
|
|
if count % 1000 == 0:
|
|
print(f" Processed {count} files...")
|
|
conn.commit()
|
|
|
|
conn.commit()
|
|
|
|
# Print summary
|
|
c.execute("SELECT COUNT(*) FROM files")
|
|
total = c.fetchone()[0]
|
|
c.execute("SELECT COUNT(*) FROM files WHERE date_original IS NOT NULL OR xmp_date IS NOT NULL")
|
|
with_date = c.fetchone()[0]
|
|
c.execute("SELECT COUNT(DISTINCT model) FROM files WHERE model IS NOT NULL")
|
|
cameras = c.fetchone()[0]
|
|
|
|
print(f"\nDone! Created {DB_PATH}")
|
|
print(f" Total files: {total}")
|
|
print(f" Files with dates: {with_date}")
|
|
print(f" Unique cameras: {cameras}")
|
|
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|