clawd/tanya_mailbox_cleanup.py

368 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Tanya's Mailbox Cleanup Script
Connects to Stalwart via IMAP and deletes marketing/spam emails based on sender rules
"""
import imaplib
import ssl
import email
import re
from collections import defaultdict
import time
# Connection details
IMAP_HOST = 'localhost'
IMAP_PORT = 9930
USERNAME = 'tanya'
PASSWORD = 'Tanya-Migrate-2026!'
# Delete rules - case insensitive partial matching on From header
DELETE_RULES = {
'marketing_retail': [
'aubade', 'news.aubade.com', 'eshop.aubade.com',
'landsend', "lands' end",
'impulsepoledance', 'impulse pole',
'metagenics',
'saksfifthavenue', 'saks fifth avenue',
'gilt', 'e.gilt.com',
'nordstrom',
'6pm.com',
'jetsetter',
'swimoutlet', # Store, not USA Swimming
'mixbook',
'island company', 'islandcompanyrum',
'houzz',
'realself',
'victoriassecret', "victoria's secret",
'bellacor',
'lululemon',
'agentprovocateur', 'agent provocateur',
'badkittyexoticwear', 'bad kitty',
'enews.lenovo.com',
'lnepresents',
'macys', "macy's", 'oes.macys.com', 'ops.macys.com',
'shutterfly',
'envato',
'bosshub',
'mail.eviteideas.com', # evite marketing, NOT signupgenius
'rubyrockets',
'sciencetees', 'nhannhan01072021',
'funnytshirts', 'dautay24012004',
'hotelcollection',
'stpetecountryfest',
'skinspirations',
'stpete@bypia.com', 'info@bypia.com', 'bypia', 'pia esthetics',
'facesofsouthtampa',
'blackberry',
'8tracks',
'villanova.nl', # Fashion store, NOT school
'stpetersburgyoga'
],
'spa': [
'tranquility' # All tranquility wellness spa variants
],
'newsletters_spam': [
'optionselite', 'options_elite', 'markantioquia',
'bestamericanstocks',
'massageluxe',
'quora',
'no-reply@mail.instagram.com',
'update@volunteerspot.com', # NOT signupgenius school ones
'news-googleplay', 'googleplay-noreply',
'onedrive photos',
'reddit noreply',
'no-reply@news.proton.me',
'luxerone',
'appstore@new.itunes.com',
'alert@listtrac.com', # Diana Geegan weekly listing reports - NUKE
'noreply@mail.usaswimming.org', # USA Swimming - NUKE
'noreply@inou.com' # inou health - NUKE
],
'security_alerts': [
'noreply@email.apple.com', # Apple security alerts only
'msa@communication.microsoft.com', 'microsoft account team',
'microsoft cashback',
'security-noreply@linkedin.com'
],
'linkedin_marketing': [
'notifications-noreply@linkedin.com', # "you appeared in searches"
'linkedin@e.linkedin.com', # anniversary/marketing
'messages-noreply@linkedin.com', # LinkedIn DMs - now NUKE
'messaging-digest', # Marina Khliaba LinkedIn - now NUKE
'nick flowers invitations@linkedin.com',
'curtis tuttle inmail'
],
'dental_marketing': [
'campaigns@', 'certify', # St Petersburg Dental Center
'lwcrm.com', 'no_reply', # Exceptional Dental
'flexdental'
],
'shipping_notifications': [
'notificatie@edm.postnl.nl', # PostNL tracking only
'jetblueairways@email.jetblue.com' # JetBlue bag tracking
],
'scam_spam': [
'hafnv@nmriv.brickwallsolutions.com',
'contact@assrinfo.org',
'admin@tripathiproductions.com',
'admin@seoreturn.com',
'wasds3134@gmail.com',
'unhto-noreply@chaosfurs',
'veronichowerton9475',
'lisamarshallqonf2',
'praphulnayak316',
'alamnooralam960',
'noreply@qualtrics',
'notification@certifyglobal.com'
],
'paypal_marketing': [
'no_reply@communications', # PayPal Communications
'paypal@e.paypal.com' # PayPal marketing
],
'russian_newsletters': [
'Новости', 'Карандаш', 'Бэбиблог', 'Оргкомитет'
]
}
# Keep rules - these should NOT be deleted
KEEP_RULES = [
# NOTE: inou.com removed (moved to delete), usaswimming.org removed (moved to delete)
'iciparisxl.nl', # Order confirmations
'dotloop', # Diana Geegan transaction docs - KEEP
'@gmail.com', # Diana Geegan personal Gmail - KEEP (will need special handling for Diana Geegan)
'iahp.org', 'clinic@', 'finance@', 'bookstore@', 'rumiko', 'miki', 'kathie',
'johan@jongsma.me', 'tj@jongsma.me',
'noreply@uber.com', 'no-reply@uber.com',
'followup@emcell.com',
'egencia',
'american airlines',
'labcorp',
'ent associates',
"women's care",
'shorecrest school',
'ashley hardy bloomz',
'8th grade parents',
'brittany brodeur',
'n-able benefits',
'no-reply@account.mintmobile.com', 'chat@mintmobile.com',
'geri brady',
'fred lewis', 'punchbowl',
'google location sharing',
'google photos partner sharing',
'curacao immigration',
'william parsons russian heritage',
'dr. davis',
'tora williams volunteer',
'andi mullins signupgenius',
'service@paypal.com', # PayPal invoices
# NOTE: LinkedIn messages removed (moved to delete), Marina Khliaba removed (moved to delete)
'jury struczewski',
'news@insideapple.apple.com', 'apple arcade', 'apple payments', 'apple account safety',
'summer at shorecrest'
]
def matches_pattern(from_header, pattern):
"""Check if from_header contains pattern (case insensitive)"""
if not from_header:
return False
return pattern.lower() in from_header.lower()
def should_keep(from_header):
"""Check if email should be kept based on keep rules"""
# Special case: Diana Geegan - keep dotloop and gmail, nuke listtrac
if 'diana geegan' in from_header.lower():
if 'dotloop' in from_header.lower():
return True # Keep dotloop transaction docs
if '@gmail.com' in from_header.lower():
return True # Keep personal Gmail
if 'listtrac' in from_header.lower():
return False # Nuke listtrac weekly reports
return False # Default to nuke other Diana Geegan emails
for pattern in KEEP_RULES:
if matches_pattern(from_header, pattern):
return True
return False
def should_delete(from_header):
"""Check if email should be deleted based on delete rules"""
if should_keep(from_header):
return False
# Special case: Apple - delete security alerts but keep other Apple
if 'apple' in from_header.lower():
if matches_pattern(from_header, 'noreply@email.apple.com'):
return True # Delete security alerts
return False # Keep other Apple emails
# Check all delete patterns
for category, patterns in DELETE_RULES.items():
for pattern in patterns:
if matches_pattern(from_header, pattern):
return True
return False
def connect_imap():
"""Connect to IMAP server"""
print(f"Connecting to {IMAP_HOST}:{IMAP_PORT}...")
# Create SSL context
context = ssl.create_default_context()
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
try:
# Connect with SSL
mail = imaplib.IMAP4_SSL(IMAP_HOST, IMAP_PORT, ssl_context=context)
print("SSL connection established")
# Debug mode disabled for production run
# mail.debug = 1
print(f"Attempting login for user: {USERNAME}")
mail.login(USERNAME, PASSWORD)
print("Connected successfully!")
return mail
except Exception as e:
print(f"Connection failed: {e}")
# Try without SSL first to see if server responds
try:
print("Trying non-SSL connection to test server response...")
mail_plain = imaplib.IMAP4(IMAP_HOST, IMAP_PORT)
capabilities = mail_plain.capability()
print(f"Server capabilities: {capabilities}")
mail_plain.logout()
except Exception as plain_e:
print(f"Plain connection also failed: {plain_e}")
raise e
def get_folders(mail):
"""Get all folders/mailboxes"""
status, folders = mail.list()
folder_names = []
for folder in folders:
# Parse folder name from IMAP LIST response
parts = folder.decode().split(' "/" ')
if len(parts) >= 2:
folder_name = parts[1].strip('"')
folder_names.append(folder_name)
return folder_names
def process_folder(mail, folder_name, stats):
"""Process a single folder"""
print(f"\nProcessing folder: {folder_name}")
try:
status, messages = mail.select(folder_name)
if status != 'OK':
print(f"Cannot select folder {folder_name}: {messages}")
return
# Search for all messages
status, messages = mail.search(None, 'ALL')
if status != 'OK':
print(f"Search failed in {folder_name}")
return
message_ids = messages[0].split()
total_messages = len(message_ids)
print(f"Found {total_messages} messages in {folder_name}")
if total_messages == 0:
return
deleted_count = 0
kept_count = 0
# Process messages in batches to avoid memory issues
batch_size = 100
for i in range(0, len(message_ids), batch_size):
batch = message_ids[i:i+batch_size]
for msg_id in batch:
try:
# Fetch header
status, msg_data = mail.fetch(msg_id, '(BODY[HEADER.FIELDS (FROM)])')
if status != 'OK':
continue
# Parse from header
from_header = ""
for response_part in msg_data:
if isinstance(response_part, tuple):
header_data = response_part[1].decode('utf-8', errors='ignore')
if 'From:' in header_data:
from_header = header_data.replace('From:', '').strip()
break
if should_delete(from_header):
# Mark as deleted
mail.store(msg_id, '+FLAGS', '\\Deleted')
deleted_count += 1
stats['deleted'] += 1
# Log what we're deleting
print(f"DELETING: {from_header}")
else:
kept_count += 1
stats['kept'] += 1
except Exception as e:
print(f"Error processing message {msg_id}: {e}")
# Progress update
processed = min(i + batch_size, len(message_ids))
print(f"Processed {processed}/{total_messages} messages...")
# Expunge deleted messages
if deleted_count > 0:
print(f"Expunging {deleted_count} deleted messages...")
mail.expunge()
print(f"Folder {folder_name}: {deleted_count} deleted, {kept_count} kept")
except Exception as e:
print(f"Error processing folder {folder_name}: {e}")
def main():
"""Main cleanup function"""
print("Starting Tanya's mailbox cleanup...")
# Connect to IMAP
mail = connect_imap()
# Get all folders
folders = get_folders(mail)
print(f"Found folders: {folders}")
# Statistics
stats = {'deleted': 0, 'kept': 0}
# Process each folder
for folder in folders:
process_folder(mail, folder, stats)
# Final summary
print(f"\n{'='*50}")
print("CLEANUP COMPLETE!")
print(f"Total emails deleted: {stats['deleted']}")
print(f"Total emails kept: {stats['kept']}")
print(f"{'='*50}")
# Close connection
mail.close()
mail.logout()
print("Connection closed.")
if __name__ == "__main__":
main()