commit bad69654149d74d7ea869620c72f99ed191b6ae2 Author: Johan Date: Sun Feb 1 02:05:06 2026 -0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5c5b5fc --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Databases +*.db +*.sqlite + +# Python +__pycache__/ +*.py[cod] +*$py.class +.Python +venv/ +.env + +# Data files +*.csv +*.txt +!requirements.txt + +# IDE +.idea/ +.vscode/ +*.swp diff --git a/build_hash_index.py b/build_hash_index.py new file mode 100755 index 0000000..8d65fd1 --- /dev/null +++ b/build_hash_index.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +"""Build hash index using first 64KB of files for fast duplicate detection.""" + +import sqlite3 +import hashlib +import os +import sys +from pathlib import Path + +DB_PATH = "/home/johan/immich-compare/hash_index.db" + +EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'} + +def hash_file_head(filepath, size=65536): + """Hash first 64KB of file.""" + try: + with open(filepath, 'rb') as f: + return hashlib.md5(f.read(size)).hexdigest() + except: + return None + +def create_db(): + if os.path.exists(DB_PATH): + os.remove(DB_PATH) + + conn = sqlite3.connect(DB_PATH) + c = conn.cursor() + c.execute('''CREATE TABLE files ( + id INTEGER PRIMARY KEY, + source TEXT, + filename TEXT, + filepath TEXT, + filesize INTEGER, + hash64k TEXT + )''') + c.execute('CREATE INDEX idx_hash ON files(hash64k)') + c.execute('CREATE INDEX idx_source ON files(source)') + c.execute('CREATE INDEX idx_filename ON files(filename)') + c.execute('CREATE INDEX idx_hash_source ON files(hash64k, source)') + conn.commit() + return conn + +def scan_directory(conn, source_name, base_path): + """Scan a directory and add files to database.""" + c = conn.cursor() + count = 0 + + for root, dirs, files in os.walk(base_path): + for fname in files: + ext = os.path.splitext(fname)[1].lower() + if ext not in EXTENSIONS: + continue + + filepath = os.path.join(root, fname) + try: + filesize = os.path.getsize(filepath) + if filesize == 0: + continue + hash64k = hash_file_head(filepath) + if hash64k: + c.execute('INSERT INTO files (source, filename, filepath, filesize, hash64k) VALUES (?, ?, ?, ?, ?)', + (source_name, fname, filepath, filesize, hash64k)) + count += 1 + if count % 1000 == 0: + print(f" {source_name}: {count} files...") + conn.commit() + except Exception as e: + pass + + conn.commit() + print(f" {source_name}: {count} files total") + return count + +def main(): + print("Building hash index for all sources...") + conn = create_db() + + sources = [ + ("mylio", "/tank/mylio-backup/Mylio"), + ("immich", "/tank/immich/library/library/admin"), + ("takeout", "/tank/staging/Data/Google Takeout 20210421/Takeout/Google Photos"), + ] + + total = 0 + for name, path in sources: + if os.path.exists(path): + print(f"\nScanning {name}: {path}") + total += scan_directory(conn, name, path) + else: + print(f"\nSkipping {name}: {path} not found") + + # Summary + c = conn.cursor() + print(f"\n{'='*50}") + print(f"Total files indexed: {total}") + + for name, _ in sources: + c.execute("SELECT COUNT(*) FROM files WHERE source = ?", (name,)) + cnt = c.fetchone()[0] + print(f" {name}: {cnt}") + + # Find duplicates + c.execute(""" + SELECT hash64k, COUNT(*) as cnt, GROUP_CONCAT(DISTINCT source) as sources + FROM files + GROUP BY hash64k + HAVING COUNT(DISTINCT source) > 1 + LIMIT 10 + """) + + print(f"\nSample cross-source duplicates:") + for row in c.fetchall(): + print(f" {row[0][:12]}... appears in: {row[2]}") + + conn.close() + print(f"\nDone! Database: {DB_PATH}") + +if __name__ == "__main__": + main() diff --git a/build_mylio_db.py b/build_mylio_db.py new file mode 100755 index 0000000..f49cf3c --- /dev/null +++ b/build_mylio_db.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +"""Build a SQLite database from local Mylio backup with EXIF metadata.""" + +import subprocess +import sqlite3 +import os +import json +import re +from pathlib import Path + +DB_PATH = "/home/johan/immich-compare/mylio_index.db" +MYLIO_PATH = "/tank/mylio-backup/Mylio" + +EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'} + +def create_db(): + if os.path.exists(DB_PATH): + os.remove(DB_PATH) + + conn = sqlite3.connect(DB_PATH) + c = conn.cursor() + c.execute('''CREATE TABLE files ( + id INTEGER PRIMARY KEY, + filename TEXT, + filepath TEXT, + filesize INTEGER, + year_folder TEXT, + date_original TEXT, + create_date TEXT, + make TEXT, + model TEXT, + software TEXT, + comment TEXT, + gps_lat REAL, + gps_lon REAL, + width INTEGER, + height INTEGER, + color_profile TEXT, + xmp_date TEXT + )''') + c.execute('CREATE INDEX idx_filename ON files(filename)') + c.execute('CREATE INDEX idx_filesize ON files(filesize)') + c.execute('CREATE INDEX idx_filename_size ON files(filename, filesize)') + c.execute('CREATE INDEX idx_date ON files(date_original)') + c.execute('CREATE INDEX idx_model ON files(model)') + c.execute('CREATE INDEX idx_software ON files(software)') + conn.commit() + return conn + +def get_exif_batch(files): + """Get EXIF data for multiple files using exiftool JSON output.""" + if not files: + return [] + + cmd = ['exiftool', '-json', '-fast', + '-DateTimeOriginal', '-CreateDate', '-Make', '-Model', + '-Software', '-Comment', '-GPSLatitude', '-GPSLongitude', + '-ImageWidth', '-ImageHeight', '-ProfileDescription'] + files + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + if result.stdout: + return json.loads(result.stdout) + except: + pass + return [] + +def get_xmp_date(filepath): + """Read DateTimeOriginal from XMP sidecar if exists.""" + for xmp_path in [filepath + '.xmp', re.sub(r'\.[^.]+$', '.xmp', filepath)]: + if os.path.exists(xmp_path): + try: + with open(xmp_path, 'r', errors='ignore') as f: + content = f.read() + match = re.search(r'DateTimeOriginal="([^"]+)"', content) + if match: + return match.group(1)[:10] + except: + pass + return None + +def main(): + print("Building Mylio database from /tank/mylio-backup/Mylio...") + conn = create_db() + c = conn.cursor() + + # Collect all files + all_files = [] + for root, dirs, files in os.walk(MYLIO_PATH): + for fname in files: + ext = os.path.splitext(fname)[1].lower() + if ext in EXTENSIONS: + all_files.append(os.path.join(root, fname)) + + print(f"Found {len(all_files)} media files") + + # Process in batches + batch_size = 100 + count = 0 + + for i in range(0, len(all_files), batch_size): + batch = all_files[i:i+batch_size] + exif_data = get_exif_batch(batch) + + # Create lookup by source file + exif_lookup = {} + for item in exif_data: + src = item.get('SourceFile', '') + exif_lookup[src] = item + + for filepath in batch: + filename = os.path.basename(filepath) + filesize = os.path.getsize(filepath) + + # Extract year folder + year_match = re.search(r'/Mylio/(\d{4})/', filepath) + year_folder = year_match.group(1) if year_match else None + + # Get EXIF data + exif = exif_lookup.get(filepath, {}) + + date_original = exif.get('DateTimeOriginal', '') + if date_original: + date_original = str(date_original)[:10].replace(':', '-') + + create_date = exif.get('CreateDate', '') + if create_date: + create_date = str(create_date)[:10].replace(':', '-') + + # Get XMP date + xmp_date = get_xmp_date(filepath) + + # GPS coordinates + gps_lat = exif.get('GPSLatitude') + gps_lon = exif.get('GPSLongitude') + if isinstance(gps_lat, str): + gps_lat = None + if isinstance(gps_lon, str): + gps_lon = None + + c.execute('''INSERT INTO files + (filename, filepath, filesize, year_folder, date_original, create_date, + make, model, software, comment, gps_lat, gps_lon, width, height, + color_profile, xmp_date) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', + (filename, filepath, filesize, year_folder, + date_original or None, create_date or None, + exif.get('Make'), exif.get('Model'), exif.get('Software'), + exif.get('Comment'), gps_lat, gps_lon, + exif.get('ImageWidth'), exif.get('ImageHeight'), + exif.get('ProfileDescription'), xmp_date)) + + count += 1 + + if count % 1000 == 0: + print(f" Processed {count} files...") + conn.commit() + + conn.commit() + + # Print summary + c.execute("SELECT COUNT(*) FROM files") + total = c.fetchone()[0] + c.execute("SELECT COUNT(*) FROM files WHERE date_original IS NOT NULL OR xmp_date IS NOT NULL") + with_date = c.fetchone()[0] + c.execute("SELECT COUNT(DISTINCT model) FROM files WHERE model IS NOT NULL") + cameras = c.fetchone()[0] + + print(f"\nDone! Created {DB_PATH}") + print(f" Total files: {total}") + print(f" Files with dates: {with_date}") + print(f" Unique cameras: {cameras}") + + conn.close() + +if __name__ == "__main__": + main() diff --git a/check-corruption.py b/check-corruption.py new file mode 100755 index 0000000..0be456c --- /dev/null +++ b/check-corruption.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +""" +Check for corrupted files in Immich by comparing database records with actual file sizes on disk. +""" + +import json +import os +import subprocess +import sys +from pathlib import Path + +# Configuration +IMMICH_API_URL = "http://localhost:2283/api" +IMMICH_API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +IMMICH_LIBRARY_PATH = "/tank/immich/library" + +# Minimum sizes for real media files (bytes) +MIN_SIZES = { + 'IMAGE': 10_000, # 10 KB - real photos are larger + 'VIDEO': 100_000, # 100 KB - real videos are larger +} + +# Suspiciously small threshold +SUSPICIOUS_SIZE = 50_000 # 50 KB + + +def get_all_assets(): + """Fetch all assets from Immich API.""" + print("Fetching assets from Immich API...") + + all_assets = [] + page = 1 + page_size = 1000 + + while True: + cmd = [ + "curl", "-s", + "-H", f"x-api-key: {IMMICH_API_KEY}", + "-H", "Content-Type: application/json", + f"{IMMICH_API_URL}/search/metadata", + "-d", json.dumps({"size": page_size, "page": page}) + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + data = json.loads(result.stdout) + + items = data.get('assets', {}).get('items', []) + if not items: + break + + all_assets.extend(items) + print(f" Fetched {len(all_assets)} assets...") + + if len(items) < page_size: + break + page += 1 + + print(f" Total: {len(all_assets)} assets") + return all_assets + + +def check_file_on_disk(original_path: str) -> tuple[bool, int]: + """ + Check if file exists on disk and get its size. + Returns (exists, size_bytes) + """ + # Convert container path to host path + # /data/library/... -> /tank/immich/library/... + disk_path = original_path.replace("/data/library", IMMICH_LIBRARY_PATH + "/library") + disk_path = original_path.replace("/data/", IMMICH_LIBRARY_PATH + "/") + + path = Path(disk_path) + if path.exists(): + return True, path.stat().st_size + return False, 0 + + +def format_size(size_bytes: int) -> str: + """Format bytes as human-readable size.""" + if size_bytes < 1024: + return f"{size_bytes} B" + elif size_bytes < 1024 * 1024: + return f"{size_bytes / 1024:.1f} KB" + elif size_bytes < 1024 * 1024 * 1024: + return f"{size_bytes / (1024 * 1024):.1f} MB" + else: + return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB" + + +def analyze_assets(assets: list) -> dict: + """Analyze all assets and categorize issues.""" + + results = { + 'missing': [], # File doesn't exist on disk + 'corrupted': [], # File exists but suspiciously small + 'no_thumbnail': [], # File exists but no thumbnail generated + 'ok': [], # File looks fine + } + + print("\nAnalyzing files on disk...") + + for i, asset in enumerate(assets): + if (i + 1) % 500 == 0: + print(f" Checked {i + 1}/{len(assets)}...") + + asset_id = asset.get('id') + filename = asset.get('originalFileName', 'unknown') + original_path = asset.get('originalPath', '') + asset_type = asset.get('type', 'IMAGE') + thumbhash = asset.get('thumbhash') + + exists, disk_size = check_file_on_disk(original_path) + + info = { + 'id': asset_id, + 'filename': filename, + 'path': original_path, + 'type': asset_type, + 'disk_size': disk_size, + 'disk_size_human': format_size(disk_size), + 'has_thumbnail': thumbhash is not None, + } + + if not exists: + results['missing'].append(info) + elif disk_size < SUSPICIOUS_SIZE: + results['corrupted'].append(info) + elif not thumbhash: + results['no_thumbnail'].append(info) + else: + results['ok'].append(info) + + return results + + +def main(): + # Get all assets + assets = get_all_assets() + + # Analyze + results = analyze_assets(assets) + + # Report + print("\n" + "=" * 70) + print("RESULTS") + print("=" * 70) + print(f"Total assets: {len(assets):,}") + print(f"OK: {len(results['ok']):,}") + print(f"Missing from disk: {len(results['missing']):,}") + print(f"Corrupted (tiny): {len(results['corrupted']):,}") + print(f"No thumbnail: {len(results['no_thumbnail']):,}") + print("=" * 70) + + # Write detailed reports + if results['corrupted']: + print(f"\n--- CORRUPTED FILES (< {format_size(SUSPICIOUS_SIZE)}) ---") + with open('corrupted_files.txt', 'w') as f: + for item in sorted(results['corrupted'], key=lambda x: x['disk_size']): + line = f"{item['disk_size_human']:>10} {item['filename']} ({item['path']})" + print(line) + f.write(f"{item['filename']}\t{item['disk_size']}\t{item['path']}\n") + print(f"\nList saved to: corrupted_files.txt") + + if results['missing']: + print(f"\n--- MISSING FILES ---") + with open('missing_from_disk.txt', 'w') as f: + for item in results['missing'][:20]: # Show first 20 + print(f" {item['filename']} ({item['path']})") + f.write(f"{item['filename']}\t{item['path']}\n") + if len(results['missing']) > 20: + print(f" ... and {len(results['missing']) - 20} more") + for item in results['missing'][20:]: + f.write(f"{item['filename']}\t{item['path']}\n") + print(f"\nList saved to: missing_from_disk.txt") + + if results['no_thumbnail']: + print(f"\n--- NO THUMBNAIL (first 20) ---") + with open('no_thumbnail.txt', 'w') as f: + for item in results['no_thumbnail'][:20]: + print(f" {item['disk_size_human']:>10} {item['filename']}") + f.write(f"{item['filename']}\t{item['disk_size']}\t{item['path']}\n") + if len(results['no_thumbnail']) > 20: + print(f" ... and {len(results['no_thumbnail']) - 20} more") + for item in results['no_thumbnail'][20:]: + f.write(f"{item['filename']}\t{item['disk_size']}\t{item['path']}\n") + print(f"\nList saved to: no_thumbnail.txt") + + +if __name__ == "__main__": + main() diff --git a/date_finder.py b/date_finder.py new file mode 100755 index 0000000..5b08481 --- /dev/null +++ b/date_finder.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +"""Find probable date range for a photo by looking at neighboring filenames.""" + +import sqlite3 +import re +import sys + +DB_PATH = "/home/johan/immich-compare/immich_index.db" + +def get_neighbors(filename, conn): + """Find files with similar names and show their dates.""" + # Extract prefix and number from filename like IMG_0656.jpg + match = re.match(r'^([A-Za-z_]+)(\d+)', filename) + if not match: + return None + + prefix = match.group(1) + num = int(match.group(2)) + + # Search for nearby numbers (±20) + results = [] + c = conn.cursor() + + for offset in range(-20, 21): + test_num = num + offset + if test_num < 0: + continue + # Try different formats for the number + for fmt in [f"{test_num:04d}", f"{test_num:05d}", f"{test_num}"]: + pattern = f"{prefix}{fmt}%" + c.execute("""SELECT filename, date, id FROM assets + WHERE filename LIKE ? AND date != '2024-01-28' + ORDER BY filename""", (pattern,)) + for row in c.fetchall(): + if row not in results: + results.append(row) + + # Sort by filename + results.sort(key=lambda x: x[0]) + return results + +def show_context(filename): + conn = sqlite3.connect(DB_PATH) + + # Check if this file exists and its current date + c = conn.cursor() + c.execute("SELECT id, date FROM assets WHERE filename = ?", (filename,)) + current = c.fetchone() + + print(f"\n=== {filename} ===") + if current: + print(f"Current date: {current[1]} (ID: {current[0]})") + else: + print("Not found in Immich") + + neighbors = get_neighbors(filename, conn) + if neighbors: + print(f"\nNeighboring files with known dates:") + print("-" * 50) + dates = set() + for fname, date, fid in neighbors: + marker = " <--" if fname.upper().startswith(filename.upper().split('.')[0]) else "" + print(f" {fname:30} {date}{marker}") + if date != '2024-01-28': + dates.add(date) + + if dates: + dates = sorted(dates) + print("-" * 50) + print(f"Date range: {dates[0]} to {dates[-1]}") + else: + print("No neighboring files found") + + conn.close() + +if __name__ == "__main__": + if len(sys.argv) > 1: + show_context(sys.argv[1]) + else: + # Show all Jan 28, 2024 files that match IMG_xxxx pattern + conn = sqlite3.connect(DB_PATH) + c = conn.cursor() + c.execute("""SELECT filename FROM assets + WHERE date = '2024-01-28' + AND filename LIKE 'IMG_%' + ORDER BY filename""") + print("Jan 28, 2024 files needing dates:") + for row in c.fetchall(): + print(f" {row[0]}") + conn.close() diff --git a/delete_corrupted.py b/delete_corrupted.py new file mode 100644 index 0000000..076eadc --- /dev/null +++ b/delete_corrupted.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +"""Delete corrupted versions of files from Immich after good versions were uploaded.""" + +import requests +import sys + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +BASE_URL = "http://localhost:2283/api" + +# Map of filename -> corrupted path from corrupted_files.txt +CORRUPTED_PATHS = { + "2010-04-03_14-07-26_406.mp4": "/data/library/admin/2010/04/2010-04-03_14-07-26_406+1.mp4", + "2010-04-03_14-07-52_756_Utrecht.mp4": "/data/library/admin/2010/04/2010-04-03_14-07-52_756_Utrecht+1.mp4", + "2010-04-04_16-02-21_184_Noordoostpolder.mp4": "/data/library/admin/2010/04/2010-04-04_16-02-21_184_Noordoostpolder+1.mp4", + "2010-04-04_16-02-44_615_Noordoostpolder.mp4": "/data/library/admin/2010/04/2010-04-04_16-02-44_615_Noordoostpolder+1.mp4", + "2010-04-16_17-22-35_167_Noordoostpolder.mp4": "/data/library/admin/2010/04/2010-04-16_17-22-35_167_Noordoostpolder+1.mp4", + "des (1).AVI": "/data/library/admin/2012/09/des (1).avi", + "des (11).AVI": "/data/library/admin/2012/09/des (11).avi", + "des (12).AVI": "/data/library/admin/2012/09/des (12).avi", + "des (13).AVI": "/data/library/admin/2012/09/des (13).avi", + "des (14).AVI": "/data/library/admin/2012/09/des (14).avi", + "des (15).AVI": "/data/library/admin/2012/09/des (15).avi", + "des (16).AVI": "/data/library/admin/2012/09/des (16).avi", + "des (2).AVI": "/data/library/admin/2012/09/des (2).avi", + "des (6).AVI": "/data/library/admin/2012/09/des (6).avi", + "des (7).AVI": "/data/library/admin/2012/09/des (7).avi", + "IMG_0024.MOV": "/data/library/admin/2010/11/IMG_0024+1.mov", + "IMG_0067.MOV": "/data/library/admin/2010/11/IMG_0067+1.mov", + "IMG_0146.MOV": "/data/library/admin/2011/06/IMG_0146+1.mov", + "IMG_0148.MOV": "/data/library/admin/2011/06/IMG_0148+1.mov", + "IMG_0149.MOV": "/data/library/admin/2011/06/IMG_0149+1.mov", + "IMG_0156.MOV": "/data/library/admin/2011/06/IMG_0156+1.mov", + "IMG_0157.MOV": "/data/library/admin/2011/06/IMG_0157+1.mov", + "IMG_0164.MOV": "/data/library/admin/2011/06/IMG_0164+1.mov", + "IMG_0165.MOV": "/data/library/admin/2011/06/IMG_0165+1.mov", + "IMG_0172.MOV": "/data/library/admin/2010/08/IMG_0172+1.mov", + "IMG_0178.MOV": "/data/library/admin/2010/08/IMG_0178+1.mov", + "IMG_0179.MOV": "/data/library/admin/2011/07/IMG_0179+1.mov", + "IMG_0182.MOV": "/data/library/admin/2010/08/IMG_0182+1.mov", + "IMG_0183.MOV": "/data/library/admin/2010/08/IMG_0183+1.mov", + "IMG_0184.MOV": "/data/library/admin/2010/08/IMG_0184+1.mov", + "IMG_0185.MOV": "/data/library/admin/2010/08/IMG_0185+1.mov", + "IMG_0187.MOV": "/data/library/admin/2010/08/IMG_0187+1.mov", + "IMG_0554.MOV": "/data/library/admin/2012/09/IMG_0554+1.mov", + "IMG_0555.MOV": "/data/library/admin/2012/09/IMG_0555+1.mov", + "IMG_0558.MOV": "/data/library/admin/2012/09/IMG_0558+1.mov", + "IMG_0581.MOV": "/data/library/admin/2012/11/IMG_0581+1.mov", + "IMG_0584.MOV": "/data/library/admin/2012/11/IMG_0584+1.mov", + "IMG_0586.MOV": "/data/library/admin/2012/11/IMG_0586+1.mov", + "IMG_0591.MOV": "/data/library/admin/2012/11/IMG_0591+1.mov", + "MVI_1077.AVI": "/data/library/admin/2012/09/MVI_1077.avi", + "MVI_1079.AVI": "/data/library/admin/2012/09/MVI_1079.avi", + "MVI_1080.AVI": "/data/library/admin/2012/09/MVI_1080.avi", + "MVI_1085.AVI": "/data/library/admin/2012/09/MVI_1085.avi", +} + +def search_file(filename): + """Search for a file by name.""" + resp = requests.post( + f"{BASE_URL}/search/metadata", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}, + json={"originalFileName": filename} + ) + if resp.status_code == 200: + data = resp.json() + return data.get("assets", {}).get("items", []) + return [] + +def delete_asset(asset_id, filename): + """Delete an asset by ID.""" + resp = requests.delete( + f"{BASE_URL}/assets", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}, + json={"ids": [asset_id], "force": True} + ) + if resp.status_code == 204: + print(f" DELETED: {filename}") + return True + else: + print(f" FAILED to delete {filename}: {resp.status_code} {resp.text}") + return False + +def main(): + deleted = 0 + not_found = 0 + errors = 0 + + for filename, corrupted_path in CORRUPTED_PATHS.items(): + print(f"\nProcessing: {filename}") + + # Search for all versions of this file + assets = search_file(filename) + + if not assets: + print(f" No assets found for {filename}") + not_found += 1 + continue + + print(f" Found {len(assets)} matches") + + # Find the one with the corrupted path (case-insensitive compare) + corrupted_path_lower = corrupted_path.lower() + found_corrupted = False + + for asset in assets: + asset_path = asset.get("originalPath", "") + print(f" - {asset['id'][:8]}... path={asset_path}") + + if asset_path.lower() == corrupted_path_lower: + print(f" Found corrupted version at: {asset_path}") + if delete_asset(asset["id"], filename): + deleted += 1 + found_corrupted = True + else: + errors += 1 + break + + if not found_corrupted and len(assets) > 0: + # Maybe already deleted or path doesn't match exactly + print(f" WARNING: Could not find corrupted path {corrupted_path}") + # Check if any has +1 in path (corrupted marker) + for asset in assets: + asset_path = asset.get("originalPath", "") + if "+1" in asset_path or "/2012/09/" in asset_path: + print(f" Found likely corrupted version at: {asset_path}") + if delete_asset(asset["id"], filename): + deleted += 1 + else: + errors += 1 + break + + print(f"\n\nSummary:") + print(f" Deleted: {deleted}") + print(f" Not found: {not_found}") + print(f" Errors: {errors}") + +if __name__ == "__main__": + main() diff --git a/delete_corrupted_plus1.py b/delete_corrupted_plus1.py new file mode 100644 index 0000000..8845532 --- /dev/null +++ b/delete_corrupted_plus1.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +"""Delete corrupted +1 video files from Immich.""" + +import requests + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +BASE_URL = "http://localhost:2283/api" + +# Corrupted +1 files (path -> container path) +CORRUPTED_FILES = [ + "/data/library/admin/2010/04/2010-04-14_07-38-01_290_Heerenveen+1.mp4", + "/data/library/admin/2012/01/IMG_0259+1.mov", + "/data/library/admin/2012/01/IMG_0262+1.mov", + "/data/library/admin/2010/08/IMG_0169+1.mov", + "/data/library/admin/2012/08/MVI_0342+1.mov", + "/data/library/admin/2012/08/MVI_0496+1.mov", + "/data/library/admin/2012/08/MVI_0462+1.mov", + "/data/library/admin/2012/08/MVI_0452+1.mov", + "/data/library/admin/2012/08/MVI_0448+1.mov", + "/data/library/admin/2012/08/MVI_0463+1.mov", + "/data/library/admin/2012/08/MVI_0468+1.mov", + "/data/library/admin/2012/08/MVI_0534+1.mov", + "/data/library/admin/2012/08/MVI_0446+1.mov", + "/data/library/admin/2012/08/MVI_0466+1.mov", + "/data/library/admin/2012/08/MVI_0335+1.mov", + "/data/library/admin/2012/08/MVI_0495+1.mov", + "/data/library/admin/2012/08/MVI_0531+1.mov", + "/data/library/admin/2012/08/MVI_0404+1.mov", + "/data/library/admin/2012/08/MVI_0467+1.mov", + "/data/library/admin/2012/08/MVI_0454+1.mov", + "/data/library/admin/2012/08/MVI_0389+1.mov", + "/data/library/admin/2012/08/MVI_0469+1.mov", + "/data/library/admin/2011/06/IMG_0147+1.mov", + "/data/library/admin/2012/08/MVI_0465+1.mov", + "/data/library/admin/2012/08/MVI_0444+1.mov", + "/data/library/admin/2012/08/MVI_0470+1.mov", + "/data/library/admin/2012/08/MVI_0440+1.mov", + "/data/library/admin/2010/04/2010-04-04_16-06-22_374_Noordoostpolder+1.mp4", + "/data/library/admin/2012/06/MVI_0060+1.mov", + "/data/library/admin/2012/11/IMG_0582+1.mov", +] + +def get_all_assets(): + """Get all assets to find by path.""" + assets = [] + page = 1 + while True: + resp = requests.get( + f"{BASE_URL}/assets", + headers={"x-api-key": API_KEY}, + params={"page": page, "size": 1000} + ) + if resp.status_code != 200: + break + data = resp.json() + if not data: + break + assets.extend(data) + page += 1 + if len(data) < 1000: + break + return assets + +def delete_asset(asset_id, path): + resp = requests.delete( + f"{BASE_URL}/assets", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}, + json={"ids": [asset_id], "force": True} + ) + if resp.status_code == 204: + print(f"DELETED: {path}") + return True + else: + print(f"FAILED: {path} - {resp.status_code}") + return False + +print("Fetching all assets...") +assets = get_all_assets() +print(f"Found {len(assets)} assets") + +# Build path -> id map +path_to_id = {a["originalPath"]: a["id"] for a in assets} + +deleted = 0 +for path in CORRUPTED_FILES: + if path in path_to_id: + if delete_asset(path_to_id[path], path): + deleted += 1 + else: + print(f"NOT FOUND: {path}") + +print(f"\nTotal deleted: {deleted}") diff --git a/delete_corrupted_plus1_v2.py b/delete_corrupted_plus1_v2.py new file mode 100644 index 0000000..72b2bb8 --- /dev/null +++ b/delete_corrupted_plus1_v2.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +"""Delete corrupted +1 video files from Immich.""" + +import requests + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +BASE_URL = "http://localhost:2283/api" + +# Corrupted files: (filename, corrupted path) +CORRUPTED_FILES = [ + ("2010-04-14_07-38-01_290_Heerenveen+1.mp4", "/data/library/admin/2010/04/2010-04-14_07-38-01_290_Heerenveen+1.mp4"), + ("IMG_0259+1.mov", "/data/library/admin/2012/01/IMG_0259+1.mov"), + ("IMG_0262+1.mov", "/data/library/admin/2012/01/IMG_0262+1.mov"), + ("IMG_0169+1.mov", "/data/library/admin/2010/08/IMG_0169+1.mov"), + ("MVI_0342+1.mov", "/data/library/admin/2012/08/MVI_0342+1.mov"), + ("MVI_0496+1.mov", "/data/library/admin/2012/08/MVI_0496+1.mov"), + ("MVI_0462+1.mov", "/data/library/admin/2012/08/MVI_0462+1.mov"), + ("MVI_0452+1.mov", "/data/library/admin/2012/08/MVI_0452+1.mov"), + ("MVI_0448+1.mov", "/data/library/admin/2012/08/MVI_0448+1.mov"), + ("MVI_0463+1.mov", "/data/library/admin/2012/08/MVI_0463+1.mov"), + ("MVI_0468+1.mov", "/data/library/admin/2012/08/MVI_0468+1.mov"), + ("MVI_0534+1.mov", "/data/library/admin/2012/08/MVI_0534+1.mov"), + ("MVI_0446+1.mov", "/data/library/admin/2012/08/MVI_0446+1.mov"), + ("MVI_0466+1.mov", "/data/library/admin/2012/08/MVI_0466+1.mov"), + ("MVI_0335+1.mov", "/data/library/admin/2012/08/MVI_0335+1.mov"), + ("MVI_0495+1.mov", "/data/library/admin/2012/08/MVI_0495+1.mov"), + ("MVI_0531+1.mov", "/data/library/admin/2012/08/MVI_0531+1.mov"), + ("MVI_0404+1.mov", "/data/library/admin/2012/08/MVI_0404+1.mov"), + ("MVI_0467+1.mov", "/data/library/admin/2012/08/MVI_0467+1.mov"), + ("MVI_0454+1.mov", "/data/library/admin/2012/08/MVI_0454+1.mov"), + ("MVI_0389+1.mov", "/data/library/admin/2012/08/MVI_0389+1.mov"), + ("MVI_0469+1.mov", "/data/library/admin/2012/08/MVI_0469+1.mov"), + ("IMG_0147+1.mov", "/data/library/admin/2011/06/IMG_0147+1.mov"), + ("MVI_0465+1.mov", "/data/library/admin/2012/08/MVI_0465+1.mov"), + ("MVI_0444+1.mov", "/data/library/admin/2012/08/MVI_0444+1.mov"), + ("MVI_0470+1.mov", "/data/library/admin/2012/08/MVI_0470+1.mov"), + ("MVI_0440+1.mov", "/data/library/admin/2012/08/MVI_0440+1.mov"), + ("2010-04-04_16-06-22_374_Noordoostpolder+1.mp4", "/data/library/admin/2010/04/2010-04-04_16-06-22_374_Noordoostpolder+1.mp4"), + ("MVI_0060+1.mov", "/data/library/admin/2012/06/MVI_0060+1.mov"), + ("IMG_0582+1.mov", "/data/library/admin/2012/11/IMG_0582+1.mov"), +] + +def search_file(filename): + resp = requests.post( + f"{BASE_URL}/search/metadata", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}, + json={"originalFileName": filename} + ) + if resp.status_code == 200: + return resp.json().get("assets", {}).get("items", []) + return [] + +def delete_asset(asset_id, path): + resp = requests.delete( + f"{BASE_URL}/assets", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}, + json={"ids": [asset_id], "force": True} + ) + if resp.status_code == 204: + print(f"DELETED: {path}") + return True + else: + print(f"FAILED: {path} - {resp.status_code}") + return False + +deleted = 0 +not_found = 0 + +for filename, expected_path in CORRUPTED_FILES: + # Search for this filename + # The filename in Immich might not have +1 in the originalFileName + # Try searching without the +1 + base_name = filename.replace("+1", "") + assets = search_file(base_name) + + found = False + for asset in assets: + if asset["originalPath"] == expected_path: + if delete_asset(asset["id"], expected_path): + deleted += 1 + found = True + break + + if not found: + # Try with +1 in filename + assets = search_file(filename) + for asset in assets: + if asset["originalPath"] == expected_path: + if delete_asset(asset["id"], expected_path): + deleted += 1 + found = True + break + + if not found: + print(f"NOT FOUND: {filename} at {expected_path}") + not_found += 1 + +print(f"\nTotal deleted: {deleted}") +print(f"Not found: {not_found}") diff --git a/delete_sep10_corrupted.py b/delete_sep10_corrupted.py new file mode 100644 index 0000000..ae987d1 --- /dev/null +++ b/delete_sep10_corrupted.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""Delete all corrupted files from 2012/09 folder in Immich.""" + +import requests + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +BASE_URL = "http://localhost:2283/api" + +# Files to delete (from 2012/09 folder) +CORRUPTED_FILES = [ + "CIMG0057.avi", "MVI_0302.avi", "CIMG0066.avi", "MVI_0313.avi", + "MVI_0928.avi", "MVI_0872.avi", "MVI_0927.avi", "MVI_0174.avi", + "MVI_0312.avi", "MVI_1334.avi", "MVI_1253.avi", "CIMG0017.avi", + "MVI_0150.avi", "MVI_1333.avi", "MOV01161.avi", "MOV01027.avi", + "MOV00421.avi", "MVI_0068.avi", "MVI_0232.avi", "CIMG0067.avi", + "MOV00743.avi", "MOV00757.avi", "MOV00083.avi", "MVI_0314.avi", + "MVI_0088.avi", "MVI_0521.avi", "MVI_0074.avi", "MVI_0904.avi", + "MOV01159.avi", "MVI_0656.avi", "MVI_0087.avi", "MOV00689.avi", + "MVI_1502.avi", "MVI_1271.avi", "MOV00884.avi", "MVI_1473.avi", + "MVI_0192.avi", "MVI_0903.avi", "MVI_0190.avi", "MVI_0658.avi", + "MOV01026.avi", "DSCN0848.avi", "MVI_0657.avi", "MVI_0665.avi", + "MVI_0067.avi", "MVI_0191.avi", "MVI_0315.avi", "MVI_0193.avi", + "MOV00883.avi" +] + +def search_file(filename): + resp = requests.post( + f"{BASE_URL}/search/metadata", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}, + json={"originalFileName": filename} + ) + if resp.status_code == 200: + return resp.json().get("assets", {}).get("items", []) + return [] + +def delete_asset(asset_id): + resp = requests.delete( + f"{BASE_URL}/assets", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}, + json={"ids": [asset_id], "force": True} + ) + return resp.status_code == 204 + +deleted = 0 +for filename in CORRUPTED_FILES: + assets = search_file(filename) + for asset in assets: + path = asset.get("originalPath", "") + if "/2012/09/" in path: + if delete_asset(asset["id"]): + print(f"DELETED: {filename} from {path}") + deleted += 1 + else: + print(f"FAILED: {filename}") + +print(f"\nTotal deleted: {deleted}") diff --git a/fix_mvi04_dates.py b/fix_mvi04_dates.py new file mode 100644 index 0000000..dca18d1 --- /dev/null +++ b/fix_mvi04_dates.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +import subprocess, json +from urllib.request import Request, urlopen + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +API_URL = "http://localhost:2283/api" + +# Correct date: Aug 24, 2012 +correct_date = "2012-08-24T12:00:00.000Z" + +files = [ + "MVI_0440.jpg", "MVI_0444.jpg", "MVI_0446.jpg", "MVI_0448.jpg", + "MVI_0454.jpg", "MVI_0462.jpg", "MVI_0463.jpg", "MVI_0465.jpg", + "MVI_0466.jpg", "MVI_0467.jpg", "MVI_0468.jpg", "MVI_0469.jpg", + "MVI_0470.jpg", "MVI_0495.jpg", +] + +for fname in files: + # Find in 2015/02 where they were moved + result = subprocess.run([ + "docker", "exec", "immich_postgres", "psql", "-U", "postgres", "-d", "immich", "-t", "-c", + f"SELECT id FROM asset WHERE \"originalFileName\" = '{fname}' AND \"deletedAt\" IS NULL;" + ], capture_output=True, text=True) + asset_id = result.stdout.strip() + + if asset_id: + data = json.dumps({"dateTimeOriginal": correct_date}).encode() + req = Request(f"{API_URL}/assets/{asset_id}", data=data, method="PUT", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}) + urlopen(req) + print(f"Fixed {fname} -> 2012-08-24") + else: + print(f"NOT FOUND: {fname}") diff --git a/immich-compare.py b/immich-compare.py new file mode 100755 index 0000000..5b89596 --- /dev/null +++ b/immich-compare.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +Photo Deduplication Tool: Compare old Windows drive against Immich library. + +Identifies photos/videos on the old drive (via SSH to Mac Mini) that are NOT +already in Immich, using SHA-1 checksum comparison. +""" + +import argparse +import subprocess +import sys +from collections import defaultdict +from pathlib import Path + +# Configuration +IMMICH_DB_CONTAINER = "immich_postgres" +IMMICH_DB_NAME = "immich" +IMMICH_DB_USER = "postgres" + +MAC_MINI_HOST = "macmini" +OLD_DRIVE_PATH = "/Volumes/Untitled/Users/Johan/Mylio/" + +# File extensions to check (lowercase) +MEDIA_EXTENSIONS = { + '.jpg', '.jpeg', '.png', '.heic', '.heif', + '.mov', '.mp4', '.avi', '.gif', '.m4v', '.mkv', '.webp', + '.tiff', '.tif', '.bmp', '.raw', '.cr2', '.nef', '.arw', '.dng', + '.3gp', '.mts', '.webm' +} + +# Directories to skip (Mylio-generated content, not actual photos) +SKIP_DIRECTORIES = { + 'Generated Images', +} + + +def get_immich_checksums() -> set[str]: + """Export all SHA-1 checksums from Immich's PostgreSQL database.""" + print("Fetching checksums from Immich database...") + + query = "SELECT encode(checksum, 'hex') FROM asset WHERE \"deletedAt\" IS NULL AND checksum IS NOT NULL;" + + cmd = [ + "docker", "exec", IMMICH_DB_CONTAINER, + "psql", "-U", IMMICH_DB_USER, "-d", IMMICH_DB_NAME, + "-t", "-A", "-c", query + ] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + checksums = set(line.strip().lower() for line in result.stdout.strip().split('\n') if line.strip()) + print(f" Found {len(checksums):,} checksums in Immich") + return checksums + except subprocess.CalledProcessError as e: + print(f"Error querying Immich database: {e}", file=sys.stderr) + print(f"stderr: {e.stderr}", file=sys.stderr) + sys.exit(1) + + +def build_remote_scan_script(include_generated: bool = False) -> str: + """Build the shell script to run on Mac Mini for scanning files.""" + extensions_pattern = " -o ".join(f'-iname "*.{ext.lstrip(".")}"' for ext in MEDIA_EXTENSIONS) + + # Build prune patterns for directories to skip + if include_generated: + prune_pattern = "" + else: + prune_dirs = " -o ".join(f'-name "{d}"' for d in SKIP_DIRECTORIES) + prune_pattern = f'\\( {prune_dirs} \\) -prune -o' + + script = f''' +set -e +cd "{OLD_DRIVE_PATH}" 2>/dev/null || {{ echo "ERROR: Cannot access {OLD_DRIVE_PATH}" >&2; exit 1; }} + +# Find all media files and calculate SHA-1 +find . {prune_pattern} -type f \\( {extensions_pattern} \\) -print0 2>/dev/null | while IFS= read -r -d '' file; do + # Calculate SHA-1 checksum + checksum=$(shasum -a 1 "$file" 2>/dev/null | cut -d' ' -f1) + if [ -n "$checksum" ]; then + echo "$checksum $file" + fi +done +''' + return script + + +def scan_old_drive(checksums: set[str], include_generated: bool = False) -> tuple[list[str], int, int, int]: + """ + SSH to Mac Mini and scan the old drive, comparing against Immich checksums. + + Returns: (missing_files, checked_count, found_count, skipped_count) + """ + print(f"\nScanning old drive via SSH to {MAC_MINI_HOST}...") + print(f" Path: {OLD_DRIVE_PATH}") + if not include_generated: + print(f" Skipping directories: {', '.join(SKIP_DIRECTORIES)}") + + script = build_remote_scan_script(include_generated) + + cmd = ["ssh", MAC_MINI_HOST, "bash -s"] + + missing_files = [] + checked_count = 0 + found_count = 0 + skipped_count = 0 + + try: + process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + # Send script to remote bash + stdout, stderr = process.communicate(input=script, timeout=7200) # 2 hour timeout + + if process.returncode != 0: + print(f"Error scanning remote drive: {stderr}", file=sys.stderr) + sys.exit(1) + + # Process results + for line in stdout.strip().split('\n'): + if not line.strip(): + continue + + parts = line.split(' ', 1) + if len(parts) != 2: + continue + + checksum, filepath = parts + checksum = checksum.lower().strip() + filepath = filepath.strip() + + checked_count += 1 + + if checksum in checksums: + found_count += 1 + else: + # Reconstruct full path + full_path = str(Path(OLD_DRIVE_PATH) / filepath.lstrip('./')) + missing_files.append(full_path) + + # Progress update every 1000 files + if checked_count % 1000 == 0: + print(f" Processed {checked_count:,} files... ({found_count:,} in Immich, {len(missing_files):,} missing)") + + return missing_files, checked_count, found_count, skipped_count + + except subprocess.TimeoutExpired: + print("Error: Remote scan timed out after 2 hours", file=sys.stderr) + process.kill() + sys.exit(1) + except Exception as e: + print(f"Error during remote scan: {e}", file=sys.stderr) + sys.exit(1) + + +def analyze_missing_files(missing_files: list[str]) -> dict[str, list[str]]: + """Group missing files by their parent directory for better analysis.""" + by_folder = defaultdict(list) + for filepath in missing_files: + # Get the immediate parent folder name relative to Mylio root + rel_path = filepath.replace(OLD_DRIVE_PATH, '') + parts = rel_path.split('/') + if len(parts) >= 2: + folder = '/'.join(parts[:2]) # e.g., "Mylio/2020" or "Apple Photos" + else: + folder = parts[0] if parts else "root" + by_folder[folder].append(filepath) + return dict(by_folder) + + +def write_report(missing_files: list[str], output_file: Path): + """Write the list of missing files to a text file.""" + with open(output_file, 'w') as f: + for filepath in sorted(missing_files): + f.write(f"{filepath}\n") + print(f" Full list written to: {output_file}") + + +def main(): + parser = argparse.ArgumentParser( + description="Compare photos/videos on old drive against Immich library" + ) + parser.add_argument( + "--output", "-o", + type=Path, + default=Path("missing_files.txt"), + help="Output file for missing files list (default: missing_files.txt)" + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Only fetch Immich checksums, don't scan remote drive" + ) + parser.add_argument( + "--include-generated", + action="store_true", + help="Include 'Generated Images' folder (Mylio thumbnails/previews)" + ) + args = parser.parse_args() + + # Step 1: Get Immich checksums + immich_checksums = get_immich_checksums() + + if args.dry_run: + print("\n[DRY RUN] Skipping remote scan") + return + + # Step 2: Scan old drive and compare + missing_files, checked_count, found_count, _ = scan_old_drive( + immich_checksums, + include_generated=args.include_generated + ) + + # Step 3: Report results + print("\n" + "=" * 60) + print("RESULTS") + print("=" * 60) + print(f"Total files checked: {checked_count:,}") + print(f"Already in Immich: {found_count:,}") + print(f"NOT in Immich: {len(missing_files):,}") + print("=" * 60) + + if missing_files: + # Analyze by folder + by_folder = analyze_missing_files(missing_files) + print("\nBreakdown by folder:") + for folder in sorted(by_folder.keys()): + print(f" {folder}: {len(by_folder[folder]):,} files") + + write_report(missing_files, args.output) + print(f"\nReview {args.output} to see files that need to be imported.") + else: + print("\nAll files from the old drive are already in Immich!") + + +if __name__ == "__main__": + main() diff --git a/preview-tool/app.py b/preview-tool/app.py new file mode 100644 index 0000000..d5a09b2 --- /dev/null +++ b/preview-tool/app.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +"""Visual comparison tool for Jan 28, 2024 photos - no dependencies.""" + +from http.server import HTTPServer, BaseHTTPRequestHandler +import os +import subprocess +import glob +import urllib.parse + +LIBRARY = "/tank/immich/library/library/admin" +JAN2024 = f"{LIBRARY}/2024/01" + +def get_sample_files_from_immich(limit=10): + """Get IMG files from 2024/01 that are actually in Immich.""" + import json + from urllib.request import Request, urlopen + + files = [] + try: + # Query Immich for all assets, then filter + # Use search with a broad date range for 2024/01 + search_data = json.dumps({ + "takenAfter": "2024-01-01T00:00:00.000Z", + "takenBefore": "2024-02-01T00:00:00.000Z", + "size": 200 + }).encode() + req = Request(f"{API_URL}/search/metadata", data=search_data, + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}) + resp = urlopen(req) + data = json.loads(resp.read()) + + for item in data.get("assets", {}).get("items", []): + path = item.get("originalPath", "") + fname = path.split("/")[-1] + if "/2024/01/" in path and fname.lower().startswith("img_") and fname.lower().endswith((".jpg", ".jpeg")): + files.append({ + "filename": fname, + "immich_id": item["id"], + "path": path + }) + if len(files) >= limit: + break + except Exception as e: + print(f"Error: {e}") + + return files + +def find_matches(filename): + """Find all files with same name in other folders.""" + base = filename.rsplit('.', 1)[0].lower() + # Only strip _1024 suffix (Mylio resize indicator), keep +1 and (1) as they indicate different photos + base_clean = base.replace('_1024', '') + + matches = [] + for root, dirs, files in os.walk(LIBRARY): + if "/2024/01" in root: + continue + for f in files: + f_base = f.rsplit('.', 1)[0].lower() + f_clean = f_base.replace('_1024', '') + if f_clean == base_clean and f.lower().endswith(('.jpg', '.jpeg')): + matches.append(os.path.join(root, f)) + return matches + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +API_URL = "http://localhost:2283/api" + +def get_immich_id(filename): + """Get Immich asset ID for a file in /2024/01/.""" + import json + from urllib.request import Request, urlopen + + # Try both cases and without extension + base = filename.rsplit('.', 1)[0] + searches = [filename, filename.upper(), f"{base}.JPG", f"{base}.jpg"] + + for search_name in searches: + try: + search_data = json.dumps({"originalFileName": search_name}).encode() + req = Request(f"{API_URL}/search/metadata", data=search_data, + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}) + resp = urlopen(req) + data = json.loads(resp.read()) + for item in data.get("assets", {}).get("items", []): + if "/2024/01/" in item.get("originalPath", ""): + return item["id"] + except: + pass + return None + +def get_file_info(filepath): + stat = os.stat(filepath) + size_kb = stat.st_size / 1024 + + try: + result = subprocess.run( + ['exiftool', '-s', '-s', '-s', '-ImageSize', filepath], + capture_output=True, text=True, timeout=5 + ) + dims = result.stdout.strip() or "unknown" + except: + dims = "unknown" + + try: + result = subprocess.run( + ['exiftool', '-s', '-s', '-s', '-DateTimeOriginal', filepath], + capture_output=True, text=True, timeout=5 + ) + date = result.stdout.strip() or "No EXIF date" + except: + date = "unknown" + + rel_path = filepath.replace(LIBRARY + "/", "") + folder = '/'.join(rel_path.split('/')[:-1]) + filename = filepath.split('/')[-1] + + return { + 'path': filepath, + 'rel_path': rel_path, + 'size_kb': round(size_kb), + 'size_mb': round(size_kb/1024, 2), + 'dims': dims, + 'date': date, + 'folder': folder, + 'immich_id': get_immich_id(filename) + } + +def generate_html(): + sample_files = get_sample_files_from_immich(10) + + rows = [] + for file_data in sample_files: + filename = file_data["filename"] + immich_id = file_data["immich_id"] + filepath = f"{JAN2024}/{filename}" + + if not os.path.exists(filepath): + continue + + info = get_file_info(filepath) + info['immich_id'] = immich_id # Use the ID we already have + matches = find_matches(filename) + match_infos = [get_file_info(m) for m in sorted(matches)] + + match_html = "" + if match_infos: + for m in match_infos: + match_filename = m['path'].split('/')[-1] + match_html += f''' +
+ +
+ {match_filename}
+ {m['folder']}
+ {m['size_kb']} KB ({m['size_mb']} MB)
+ {m['dims']}
+ {m['date']} +
+
''' + else: + match_html = '
No matching files found - KEEP THIS!
' + + rows.append(f''' +
+
+
+ +
+ {filename}
+ {info['size_kb']} KB ({info['size_mb']} MB)
+ {info['dims']}
+ {info['date']} +
+ +
+
+
{match_html}
+
''') + + return f''' + + + Photo Comparison - Jan 28, 2024 + + + +

Photo Comparison: Jan 28, 2024

+

Left: Files from /2024/01/ | Right: Matching files in other folders

+

Green border = has matches (safe to delete)

+

Red border = NO matches (keep!)

+ {''.join(rows)} + + +''' + +def delete_from_immich(asset_id): + """Delete asset from Immich by ID.""" + import json + from urllib.request import Request, urlopen + from urllib.error import HTTPError + + try: + del_data = json.dumps({"ids": [asset_id], "force": True}).encode() + del_req = Request(f"{API_URL}/assets", data=del_data, method="DELETE", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}) + urlopen(del_req) + return True, "Deleted" + except HTTPError as e: + return False, str(e) + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + path = urllib.parse.unquote(self.path) + + if path == '/': + html = generate_html() + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write(html.encode()) + elif path.startswith('/image/'): + img_path = LIBRARY + "/" + path[7:] + if os.path.exists(img_path): + self.send_response(200) + self.send_header('Content-type', 'image/jpeg') + self.end_headers() + with open(img_path, 'rb') as f: + self.wfile.write(f.read()) + else: + self.send_response(404) + self.end_headers() + else: + self.send_response(404) + self.end_headers() + + def do_POST(self): + path = urllib.parse.unquote(self.path) + if path.startswith('/delete/'): + asset_id = path[8:] + success, msg = delete_from_immich(asset_id) + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + import json + self.wfile.write(json.dumps({"success": success, "error": msg if not success else None}).encode()) + else: + self.send_response(404) + self.end_headers() + + def log_message(self, format, *args): + pass # Suppress logging + +if __name__ == '__main__': + port = 5000 + print(f"Starting server at http://localhost:{port}") + print("Press Ctrl+C to stop") + HTTPServer(('0.0.0.0', port), Handler).serve_forever() diff --git a/preview-tool/grid.py b/preview-tool/grid.py new file mode 100644 index 0000000..07fcd2f --- /dev/null +++ b/preview-tool/grid.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +"""Grid view of Jan 28, 2024 images for quick date assignment.""" + +from http.server import HTTPServer, BaseHTTPRequestHandler +import os +import json +import subprocess +import urllib.parse + +LIBRARY = "/tank/immich/library/library/admin" +JAN2024 = f"{LIBRARY}/2024/01" +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +API_URL = "http://localhost:2283/api" + +def get_images_from_db(limit=500): + """Get images from Immich database.""" + result = subprocess.run([ + "docker", "exec", "immich_postgres", "psql", "-U", "postgres", "-d", "immich", "-t", "-A", "-F", "|", "-c", + f"""SELECT id, "originalFileName", "originalPath" + FROM asset + WHERE DATE("fileCreatedAt") = '2024-01-28' + AND "deletedAt" IS NULL + AND "type" = 'IMAGE' + ORDER BY "originalFileName" + LIMIT {limit};""" + ], capture_output=True, text=True) + + images = [] + for line in result.stdout.strip().split('\n'): + if '|' in line: + parts = line.split('|') + if len(parts) >= 3: + images.append({ + 'id': parts[0], + 'filename': parts[1], + 'path': parts[2].replace('/data/library/admin/', '') + }) + return images + +def generate_html(limit=20): + images = get_images_from_db(limit) + + cards = [] + for img in images: + cards.append(f''' +
+ +
{img['filename']}
+
{img['id']}
+
''') + + return f''' + + + Jan 28, 2024 - Grid View + + + +

Jan 28, 2024 Images ({len(images)} shown)

+

Click images to select. Selected IDs appear below for copying.

+ +
+ {''.join(cards)} +
+ +
+ + + + + 0 selected +
Click images to select, then copy IDs or filenames
+
+ + + +''' + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + path = urllib.parse.unquote(self.path) + + if path == '/' or path.startswith('/?'): + # Parse limit from query string + limit = 500 + if '?' in path: + params = urllib.parse.parse_qs(path.split('?')[1]) + limit = int(params.get('limit', [500])[0]) + + html = generate_html(limit) + self.send_response(200) + self.send_header('Content-type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(html.encode()) + elif path.startswith('/image/'): + img_path = LIBRARY + "/" + path[7:] + if os.path.exists(img_path): + self.send_response(200) + ext = img_path.lower().split('.')[-1] + ctype = 'image/jpeg' if ext in ['jpg', 'jpeg'] else f'image/{ext}' + self.send_header('Content-type', ctype) + self.end_headers() + with open(img_path, 'rb') as f: + self.wfile.write(f.read()) + else: + self.send_response(404) + self.end_headers() + else: + self.send_response(404) + self.end_headers() + + def log_message(self, format, *args): + pass + +if __name__ == '__main__': + port = 5001 + print(f"Grid view at http://localhost:{port}") + print(f"Add ?limit=100 for more images") + HTTPServer(('0.0.0.0', port), Handler).serve_forever() diff --git a/scan_4tb_mylio.py b/scan_4tb_mylio.py new file mode 100644 index 0000000..05a8e83 --- /dev/null +++ b/scan_4tb_mylio.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +"""Scan 4TB Mylio folder and find files not in Immich.""" + +import subprocess +import hashlib +import sqlite3 +import os + +REMOTE = "macmini" +REMOTE_PATH = "/Volumes/4TB/Mylio" +LOCAL_DB = "/home/johan/immich-compare/hash_index.db" +EXTENSIONS = {'.jpg', '.jpeg', '.png', '.heic', '.gif', '.mp4', '.mov', '.avi', '.m4v', '.3gp'} + +def get_immich_hashes(): + """Load all Immich hashes from existing database.""" + conn = sqlite3.connect(LOCAL_DB) + c = conn.cursor() + hashes = set() + for row in c.execute("SELECT hash64k FROM files WHERE source='immich'"): + hashes.add(row[0]) + conn.close() + print(f"Loaded {len(hashes)} Immich hashes") + return hashes + +def scan_remote(): + """Find all media files on remote 4TB Mylio.""" + print(f"\nFinding files on {REMOTE}:{REMOTE_PATH}...") + cmd = f"ssh {REMOTE} \"find '{REMOTE_PATH}' -type f 2>/dev/null\"" + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + + files = [] + for line in result.stdout.strip().split('\n'): + if not line: + continue + ext = os.path.splitext(line)[1].lower() + if ext in EXTENSIONS: + files.append(line) + + print(f"Found {len(files)} media files") + return files + +def hash_remote_file(filepath): + """Get 64KB hash of remote file.""" + cmd = f"ssh {REMOTE} \"head -c 65536 '{filepath}' 2>/dev/null\" | md5sum" + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + if result.returncode == 0: + return result.stdout.split()[0] + return None + +def main(): + immich_hashes = get_immich_hashes() + files = scan_remote() + + missing = [] + matched = 0 + + print(f"\nHashing files and comparing...") + for i, filepath in enumerate(files): + if (i + 1) % 100 == 0: + print(f" {i+1}/{len(files)} - {matched} matched, {len(missing)} missing") + + h = hash_remote_file(filepath) + if h is None: + continue + + if h in immich_hashes: + matched += 1 + else: + missing.append(filepath) + + print(f"\n{'='*50}") + print(f"Results:") + print(f" Total scanned: {len(files)}") + print(f" Already in Immich: {matched}") + print(f" NOT in Immich: {len(missing)}") + + # Save missing list + with open('/tmp/4tb_mylio_missing.txt', 'w') as f: + for p in missing: + f.write(p + '\n') + + print(f"\nMissing files saved to /tmp/4tb_mylio_missing.txt") + return missing + +if __name__ == "__main__": + main() diff --git a/update_aut3_dates.py b/update_aut3_dates.py new file mode 100644 index 0000000..29d1f38 --- /dev/null +++ b/update_aut3_dates.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import subprocess, json +from urllib.request import Request, urlopen + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +API_URL = "http://localhost:2283/api" + +files = { + "AUT_3127.jpg": "2004-03-06T00:09:40.000Z", + "AUT_3128.jpg": "2004-03-06T00:09:38.000Z", + "AUT_3143.jpg": "2004-03-06T00:09:22.000Z", + "AUT_3260.jpg": "2007-08-20T21:46:09.000Z", + "AUT_3261.jpg": "2007-08-20T21:46:10.000Z", + "AUT_3263.jpg": "2007-08-20T21:46:19.000Z", + "AUT_3264.jpg": "2007-08-20T21:45:51.000Z", + "AUT_3265.jpg": "2007-08-20T21:45:53.000Z", + "AUT_3267.jpg": "2007-08-20T21:45:55.000Z", + "AUT_3269.jpg": "2007-08-20T21:45:56.000Z", + "AUT_3273.jpg": "2007-08-20T21:46:20.000Z", + "AUT_3274.jpg": "2007-08-20T21:46:22.000Z", + "AUT_3976.jpg": "2008-09-15T13:01:38.000Z", + "AUT_3977.jpg": "2008-09-15T13:01:39.000Z", + "AUT_3978.jpg": "2008-09-15T13:01:41.000Z", + "AUT_3990.jpg": "2008-09-15T13:01:58.000Z", +} + +for fname, date in files.items(): + result = subprocess.run([ + "docker", "exec", "immich_postgres", "psql", "-U", "postgres", "-d", "immich", "-t", "-c", + f"SELECT id FROM asset WHERE \"originalPath\" LIKE '%/2024/01/{fname}' AND \"deletedAt\" IS NULL;" + ], capture_output=True, text=True) + asset_id = result.stdout.strip() + + if asset_id: + data = json.dumps({"dateTimeOriginal": date}).encode() + req = Request(f"{API_URL}/assets/{asset_id}", data=data, method="PUT", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}) + urlopen(req) + print(f"Updated {fname} -> {date[:10]}") + else: + print(f"NOT FOUND: {fname}") diff --git a/update_img8_dates.py b/update_img8_dates.py new file mode 100644 index 0000000..8bdd9d6 --- /dev/null +++ b/update_img8_dates.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +"""Update dates for IMG_8xxx files in Immich.""" + +import subprocess +import json +from urllib.request import Request, urlopen + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +API_URL = "http://localhost:2283/api" + +files = { + "IMG_8007.jpg": "2010-09-20T12:16:54.000Z", + "IMG_8027.jpg": "2010-09-20T12:16:54.000Z", + "IMG_8028.jpg": "2010-09-20T12:17:00.000Z", + "IMG_8030.jpg": "2010-09-20T12:17:00.000Z", + "IMG_8625.jpg": "2009-08-28T13:48:22.000Z", + "IMG_8627.jpg": "2009-08-28T13:48:22.000Z", + "IMG_8629.jpg": "2009-08-28T13:48:22.000Z", + "IMG_8630.jpg": "2009-08-28T13:48:22.000Z", + "IMG_8631.jpg": "2009-08-28T13:48:22.000Z", + "IMG_8633.jpg": "2009-08-28T13:48:22.000Z", + "IMG_8634.jpg": "2009-08-28T13:48:22.000Z", + "IMG_8635.jpg": "2009-08-28T13:48:24.000Z", + "IMG_8636.jpg": "2009-08-28T13:48:24.000Z", + "IMG_8638.jpg": "2009-08-28T13:48:24.000Z", +} + +def get_asset_id(filename): + result = subprocess.run([ + "docker", "exec", "immich_postgres", "psql", "-U", "postgres", "-d", "immich", "-t", "-c", + f"SELECT id FROM asset WHERE \"originalPath\" LIKE '%/2024/01/{filename}' AND \"deletedAt\" IS NULL;" + ], capture_output=True, text=True) + return result.stdout.strip() + +def update_date(asset_id, date): + data = json.dumps({"dateTimeOriginal": date}).encode() + req = Request(f"{API_URL}/assets/{asset_id}", data=data, method="PUT", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}) + urlopen(req) + +updated = 0 +for fname, date in files.items(): + asset_id = get_asset_id(fname) + if asset_id: + update_date(asset_id, date) + print(f"Updated {fname} -> {date[:10]}") + updated += 1 + else: + print(f"NOT FOUND: {fname}") + +print(f"\nTotal updated: {updated}") diff --git a/update_mvi04_dates.py b/update_mvi04_dates.py new file mode 100644 index 0000000..2254ea6 --- /dev/null +++ b/update_mvi04_dates.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import subprocess, json +from urllib.request import Request, urlopen + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +API_URL = "http://localhost:2283/api" + +files = { + "MVI_0440.jpg": "2015-02-07T02:06:37.000Z", + "MVI_0444.jpg": "2015-02-07T01:53:45.000Z", + "MVI_0446.jpg": "2015-02-07T02:03:32.000Z", + "MVI_0448.jpg": "2015-02-07T01:57:12.000Z", + "MVI_0454.jpg": "2015-02-07T01:53:36.000Z", + "MVI_0462.jpg": "2015-02-07T02:00:27.000Z", + "MVI_0463.jpg": "2015-02-07T01:57:01.000Z", + "MVI_0465.jpg": "2015-02-07T02:06:22.000Z", + "MVI_0466.jpg": "2015-02-07T02:03:18.000Z", + "MVI_0467.jpg": "2015-02-07T02:00:18.000Z", + "MVI_0468.jpg": "2015-02-07T01:56:56.000Z", + "MVI_0469.jpg": "2015-02-07T02:09:17.000Z", + "MVI_0470.jpg": "2015-02-07T02:06:17.000Z", + "MVI_0495.jpg": "2015-02-07T02:06:01.000Z", +} + +for fname, date in files.items(): + result = subprocess.run([ + "docker", "exec", "immich_postgres", "psql", "-U", "postgres", "-d", "immich", "-t", "-c", + f"SELECT id FROM asset WHERE \"originalPath\" LIKE '%/2024/01/{fname}' AND \"deletedAt\" IS NULL;" + ], capture_output=True, text=True) + asset_id = result.stdout.strip() + + if asset_id: + data = json.dumps({"dateTimeOriginal": date}).encode() + req = Request(f"{API_URL}/assets/{asset_id}", data=data, method="PUT", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}) + urlopen(req) + print(f"Updated {fname} -> {date[:10]}") + else: + print(f"NOT FOUND: {fname}") diff --git a/update_uuid_dates.py b/update_uuid_dates.py new file mode 100644 index 0000000..018caa4 --- /dev/null +++ b/update_uuid_dates.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +"""Update dates for UUID-named files in Immich.""" + +import subprocess +import json +from urllib.request import Request, urlopen + +API_KEY = "GsWQUTR6EXlkKp1M82jDJ3KmzhM0fMAbbIbfHDyI" +API_URL = "http://localhost:2283/api" + +# UUID files with their correct dates from Mylio XMP +files = { + "09C298F7-2237-4372-A99C-089BA7AE76A8.jpg": "2021-09-25T04:03:24.000Z", + "1BF9EA26-225E-4AAE-B27D-3750C8AAF67A.jpg": "2021-09-25T04:04:06.000Z", + "3CEFC5D9-B100-4AA3-9CCE-121F9CBEFB8C.jpg": "2021-10-04T03:55:08.000Z", + "3F53A10E-8403-4C05-B180-32482554CD0F.jpg": "2021-09-11T03:05:17.000Z", + "41F7B9E3-C476-460D-9A61-AC7794524864.jpg": "2021-12-28T16:57:59.000Z", + "44C13A26-F430-4018-9CD5-5C64A84D9BE1.jpg": "2021-09-23T14:21:59.000Z", + "507C94F6-EB74-4F6E-ACAF-F3D424DB5C30.jpg": "2021-12-28T16:58:36.000Z", + "57E09983-E365-4D55-A6BD-FD48C4E93823.jpg": "2021-09-11T03:05:31.000Z", + "73149BC3-657B-45E1-9F14-AF344CC17B74.jpg": "2021-12-28T16:57:26.000Z", + "80262004-01C7-465F-BFC4-AA822A36EFF2.jpg": "2021-09-25T04:04:32.000Z", + "8068628A-DF39-403E-A4FE-6095B59908D3.jpg": "2021-09-11T02:59:17.000Z", + "81B12ED7-9C2A-457F-86A7-9EDA320607A0.jpg": "2021-09-23T14:22:00.000Z", + "91E396CA-3528-4EDC-951E-FF70FD8768FD.jpg": "2021-12-28T16:57:51.000Z", + "A6AEF278-BB86-49F9-BD93-EE728780213E.jpg": "2021-09-23T14:22:07.000Z", + "B2AA2B92-F554-4E3F-90D3-AE59E3C41751.jpg": "2021-10-04T03:55:33.000Z", + "B4899F17-6BA5-4516-81BD-04CC119D2C3D.jpg": "2021-10-04T03:54:09.000Z", + "B9AD2FBE-5D33-426C-9F0C-08F4EAC27440.jpg": "2021-12-28T16:58:14.000Z", + "BE71C547-F247-4CFD-BF66-65438C45C6C0.jpg": "2021-10-04T03:54:45.000Z", + "CB381DB7-8B95-4581-B53A-D03B5B7EF94C.jpg": "2021-12-28T16:57:14.000Z", + "E2A04304-643D-4569-839A-60A0E6D8071A.jpg": "2021-12-28T16:57:02.000Z", + "ED522636-3584-4DD0-B176-BC0E5F8CCB88.jpg": "2021-10-04T03:54:33.000Z", + "EEE55391-320C-4399-8378-6A42A99617BB.jpg": "2021-12-28T16:57:42.000Z", + "F38214B0-166E-47B7-A2AE-328FB5DC8D8A.jpg": "2021-12-28T16:58:46.000Z", + "F91246B7-039C-4E78-BF15-350E65FDEF44.jpg": "2021-12-28T16:58:27.000Z", + "FB2BCD81-AD35-4A23-8EC2-CC8DAB42714E.jpg": "2021-10-04T03:54:26.000Z", +} + +def get_asset_id(filename): + """Get Immich asset ID from database.""" + result = subprocess.run([ + "docker", "exec", "immich_postgres", "psql", "-U", "postgres", "-d", "immich", "-t", "-c", + f"SELECT id FROM asset WHERE \"originalPath\" LIKE '%/2024/01/{filename}' AND \"deletedAt\" IS NULL;" + ], capture_output=True, text=True) + return result.stdout.strip() + +def update_date(asset_id, date): + """Update asset date via Immich API.""" + data = json.dumps({"dateTimeOriginal": date}).encode() + req = Request(f"{API_URL}/assets/{asset_id}", data=data, method="PUT", + headers={"x-api-key": API_KEY, "Content-Type": "application/json"}) + urlopen(req) + +updated = 0 +for fname, date in files.items(): + asset_id = get_asset_id(fname) + if asset_id: + update_date(asset_id, date) + print(f"Updated {fname} -> {date[:10]}") + updated += 1 + else: + print(f"NOT FOUND: {fname}") + +print(f"\nTotal updated: {updated}")