368 lines
12 KiB
Python
368 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tanya's Mailbox Cleanup Script
|
|
Connects to Stalwart via IMAP and deletes marketing/spam emails based on sender rules
|
|
"""
|
|
|
|
import imaplib
|
|
import ssl
|
|
import email
|
|
import re
|
|
from collections import defaultdict
|
|
import time
|
|
|
|
# Connection details
|
|
IMAP_HOST = 'localhost'
|
|
IMAP_PORT = 9930
|
|
USERNAME = 'tanya'
|
|
PASSWORD = 'Tanya-Migrate-2026!'
|
|
|
|
# Delete rules - case insensitive partial matching on From header
|
|
DELETE_RULES = {
|
|
'marketing_retail': [
|
|
'aubade', 'news.aubade.com', 'eshop.aubade.com',
|
|
'landsend', "lands' end",
|
|
'impulsepoledance', 'impulse pole',
|
|
'metagenics',
|
|
'saksfifthavenue', 'saks fifth avenue',
|
|
'gilt', 'e.gilt.com',
|
|
'nordstrom',
|
|
'6pm.com',
|
|
'jetsetter',
|
|
'swimoutlet', # Store, not USA Swimming
|
|
'mixbook',
|
|
'island company', 'islandcompanyrum',
|
|
'houzz',
|
|
'realself',
|
|
'victoriassecret', "victoria's secret",
|
|
'bellacor',
|
|
'lululemon',
|
|
'agentprovocateur', 'agent provocateur',
|
|
'badkittyexoticwear', 'bad kitty',
|
|
'enews.lenovo.com',
|
|
'lnepresents',
|
|
'macys', "macy's", 'oes.macys.com', 'ops.macys.com',
|
|
'shutterfly',
|
|
'envato',
|
|
'bosshub',
|
|
'mail.eviteideas.com', # evite marketing, NOT signupgenius
|
|
'rubyrockets',
|
|
'sciencetees', 'nhannhan01072021',
|
|
'funnytshirts', 'dautay24012004',
|
|
'hotelcollection',
|
|
'stpetecountryfest',
|
|
'skinspirations',
|
|
'stpete@bypia.com', 'info@bypia.com', 'bypia', 'pia esthetics',
|
|
'facesofsouthtampa',
|
|
'blackberry',
|
|
'8tracks',
|
|
'villanova.nl', # Fashion store, NOT school
|
|
'stpetersburgyoga'
|
|
],
|
|
'spa': [
|
|
'tranquility' # All tranquility wellness spa variants
|
|
],
|
|
'newsletters_spam': [
|
|
'optionselite', 'options_elite', 'markantioquia',
|
|
'bestamericanstocks',
|
|
'massageluxe',
|
|
'quora',
|
|
'no-reply@mail.instagram.com',
|
|
'update@volunteerspot.com', # NOT signupgenius school ones
|
|
'news-googleplay', 'googleplay-noreply',
|
|
'onedrive photos',
|
|
'reddit noreply',
|
|
'no-reply@news.proton.me',
|
|
'luxerone',
|
|
'appstore@new.itunes.com',
|
|
'alert@listtrac.com', # Diana Geegan weekly listing reports - NUKE
|
|
'noreply@mail.usaswimming.org', # USA Swimming - NUKE
|
|
'noreply@inou.com' # inou health - NUKE
|
|
],
|
|
'security_alerts': [
|
|
'noreply@email.apple.com', # Apple security alerts only
|
|
'msa@communication.microsoft.com', 'microsoft account team',
|
|
'microsoft cashback',
|
|
'security-noreply@linkedin.com'
|
|
],
|
|
'linkedin_marketing': [
|
|
'notifications-noreply@linkedin.com', # "you appeared in searches"
|
|
'linkedin@e.linkedin.com', # anniversary/marketing
|
|
'messages-noreply@linkedin.com', # LinkedIn DMs - now NUKE
|
|
'messaging-digest', # Marina Khliaba LinkedIn - now NUKE
|
|
'nick flowers invitations@linkedin.com',
|
|
'curtis tuttle inmail'
|
|
],
|
|
'dental_marketing': [
|
|
'campaigns@', 'certify', # St Petersburg Dental Center
|
|
'lwcrm.com', 'no_reply', # Exceptional Dental
|
|
'flexdental'
|
|
],
|
|
'shipping_notifications': [
|
|
'notificatie@edm.postnl.nl', # PostNL tracking only
|
|
'jetblueairways@email.jetblue.com' # JetBlue bag tracking
|
|
],
|
|
'scam_spam': [
|
|
'hafnv@nmriv.brickwallsolutions.com',
|
|
'contact@assrinfo.org',
|
|
'admin@tripathiproductions.com',
|
|
'admin@seoreturn.com',
|
|
'wasds3134@gmail.com',
|
|
'unhto-noreply@chaosfurs',
|
|
'veronichowerton9475',
|
|
'lisamarshallqonf2',
|
|
'praphulnayak316',
|
|
'alamnooralam960',
|
|
'noreply@qualtrics',
|
|
'notification@certifyglobal.com'
|
|
],
|
|
'paypal_marketing': [
|
|
'no_reply@communications', # PayPal Communications
|
|
'paypal@e.paypal.com' # PayPal marketing
|
|
],
|
|
'russian_newsletters': [
|
|
'Новости', 'Карандаш', 'Бэбиблог', 'Оргкомитет'
|
|
]
|
|
}
|
|
|
|
# Keep rules - these should NOT be deleted
|
|
KEEP_RULES = [
|
|
# NOTE: inou.com removed (moved to delete), usaswimming.org removed (moved to delete)
|
|
'iciparisxl.nl', # Order confirmations
|
|
'dotloop', # Diana Geegan transaction docs - KEEP
|
|
'@gmail.com', # Diana Geegan personal Gmail - KEEP (will need special handling for Diana Geegan)
|
|
'iahp.org', 'clinic@', 'finance@', 'bookstore@', 'rumiko', 'miki', 'kathie',
|
|
'johan@jongsma.me', 'tj@jongsma.me',
|
|
'noreply@uber.com', 'no-reply@uber.com',
|
|
'followup@emcell.com',
|
|
'egencia',
|
|
'american airlines',
|
|
'labcorp',
|
|
'ent associates',
|
|
"women's care",
|
|
'shorecrest school',
|
|
'ashley hardy bloomz',
|
|
'8th grade parents',
|
|
'brittany brodeur',
|
|
'n-able benefits',
|
|
'no-reply@account.mintmobile.com', 'chat@mintmobile.com',
|
|
'geri brady',
|
|
'fred lewis', 'punchbowl',
|
|
'google location sharing',
|
|
'google photos partner sharing',
|
|
'curacao immigration',
|
|
'william parsons russian heritage',
|
|
'dr. davis',
|
|
'tora williams volunteer',
|
|
'andi mullins signupgenius',
|
|
'service@paypal.com', # PayPal invoices
|
|
# NOTE: LinkedIn messages removed (moved to delete), Marina Khliaba removed (moved to delete)
|
|
'jury struczewski',
|
|
'news@insideapple.apple.com', 'apple arcade', 'apple payments', 'apple account safety',
|
|
'summer at shorecrest'
|
|
]
|
|
|
|
def matches_pattern(from_header, pattern):
|
|
"""Check if from_header contains pattern (case insensitive)"""
|
|
if not from_header:
|
|
return False
|
|
return pattern.lower() in from_header.lower()
|
|
|
|
def should_keep(from_header):
|
|
"""Check if email should be kept based on keep rules"""
|
|
|
|
# Special case: Diana Geegan - keep dotloop and gmail, nuke listtrac
|
|
if 'diana geegan' in from_header.lower():
|
|
if 'dotloop' in from_header.lower():
|
|
return True # Keep dotloop transaction docs
|
|
if '@gmail.com' in from_header.lower():
|
|
return True # Keep personal Gmail
|
|
if 'listtrac' in from_header.lower():
|
|
return False # Nuke listtrac weekly reports
|
|
return False # Default to nuke other Diana Geegan emails
|
|
|
|
for pattern in KEEP_RULES:
|
|
if matches_pattern(from_header, pattern):
|
|
return True
|
|
return False
|
|
|
|
def should_delete(from_header):
|
|
"""Check if email should be deleted based on delete rules"""
|
|
if should_keep(from_header):
|
|
return False
|
|
|
|
# Special case: Apple - delete security alerts but keep other Apple
|
|
if 'apple' in from_header.lower():
|
|
if matches_pattern(from_header, 'noreply@email.apple.com'):
|
|
return True # Delete security alerts
|
|
return False # Keep other Apple emails
|
|
|
|
# Check all delete patterns
|
|
for category, patterns in DELETE_RULES.items():
|
|
for pattern in patterns:
|
|
if matches_pattern(from_header, pattern):
|
|
return True
|
|
|
|
return False
|
|
|
|
def connect_imap():
|
|
"""Connect to IMAP server"""
|
|
print(f"Connecting to {IMAP_HOST}:{IMAP_PORT}...")
|
|
|
|
# Create SSL context
|
|
context = ssl.create_default_context()
|
|
context.check_hostname = False
|
|
context.verify_mode = ssl.CERT_NONE
|
|
|
|
try:
|
|
# Connect with SSL
|
|
mail = imaplib.IMAP4_SSL(IMAP_HOST, IMAP_PORT, ssl_context=context)
|
|
print("SSL connection established")
|
|
|
|
# Debug mode disabled for production run
|
|
# mail.debug = 1
|
|
|
|
print(f"Attempting login for user: {USERNAME}")
|
|
mail.login(USERNAME, PASSWORD)
|
|
|
|
print("Connected successfully!")
|
|
return mail
|
|
|
|
except Exception as e:
|
|
print(f"Connection failed: {e}")
|
|
|
|
# Try without SSL first to see if server responds
|
|
try:
|
|
print("Trying non-SSL connection to test server response...")
|
|
mail_plain = imaplib.IMAP4(IMAP_HOST, IMAP_PORT)
|
|
capabilities = mail_plain.capability()
|
|
print(f"Server capabilities: {capabilities}")
|
|
mail_plain.logout()
|
|
except Exception as plain_e:
|
|
print(f"Plain connection also failed: {plain_e}")
|
|
|
|
raise e
|
|
|
|
def get_folders(mail):
|
|
"""Get all folders/mailboxes"""
|
|
status, folders = mail.list()
|
|
folder_names = []
|
|
|
|
for folder in folders:
|
|
# Parse folder name from IMAP LIST response
|
|
parts = folder.decode().split(' "/" ')
|
|
if len(parts) >= 2:
|
|
folder_name = parts[1].strip('"')
|
|
folder_names.append(folder_name)
|
|
|
|
return folder_names
|
|
|
|
def process_folder(mail, folder_name, stats):
|
|
"""Process a single folder"""
|
|
print(f"\nProcessing folder: {folder_name}")
|
|
|
|
try:
|
|
status, messages = mail.select(folder_name)
|
|
if status != 'OK':
|
|
print(f"Cannot select folder {folder_name}: {messages}")
|
|
return
|
|
|
|
# Search for all messages
|
|
status, messages = mail.search(None, 'ALL')
|
|
if status != 'OK':
|
|
print(f"Search failed in {folder_name}")
|
|
return
|
|
|
|
message_ids = messages[0].split()
|
|
total_messages = len(message_ids)
|
|
|
|
print(f"Found {total_messages} messages in {folder_name}")
|
|
|
|
if total_messages == 0:
|
|
return
|
|
|
|
deleted_count = 0
|
|
kept_count = 0
|
|
|
|
# Process messages in batches to avoid memory issues
|
|
batch_size = 100
|
|
for i in range(0, len(message_ids), batch_size):
|
|
batch = message_ids[i:i+batch_size]
|
|
|
|
for msg_id in batch:
|
|
try:
|
|
# Fetch header
|
|
status, msg_data = mail.fetch(msg_id, '(BODY[HEADER.FIELDS (FROM)])')
|
|
if status != 'OK':
|
|
continue
|
|
|
|
# Parse from header
|
|
from_header = ""
|
|
for response_part in msg_data:
|
|
if isinstance(response_part, tuple):
|
|
header_data = response_part[1].decode('utf-8', errors='ignore')
|
|
if 'From:' in header_data:
|
|
from_header = header_data.replace('From:', '').strip()
|
|
break
|
|
|
|
if should_delete(from_header):
|
|
# Mark as deleted
|
|
mail.store(msg_id, '+FLAGS', '\\Deleted')
|
|
deleted_count += 1
|
|
stats['deleted'] += 1
|
|
|
|
# Log what we're deleting
|
|
print(f"DELETING: {from_header}")
|
|
else:
|
|
kept_count += 1
|
|
stats['kept'] += 1
|
|
|
|
except Exception as e:
|
|
print(f"Error processing message {msg_id}: {e}")
|
|
|
|
# Progress update
|
|
processed = min(i + batch_size, len(message_ids))
|
|
print(f"Processed {processed}/{total_messages} messages...")
|
|
|
|
# Expunge deleted messages
|
|
if deleted_count > 0:
|
|
print(f"Expunging {deleted_count} deleted messages...")
|
|
mail.expunge()
|
|
|
|
print(f"Folder {folder_name}: {deleted_count} deleted, {kept_count} kept")
|
|
|
|
except Exception as e:
|
|
print(f"Error processing folder {folder_name}: {e}")
|
|
|
|
def main():
|
|
"""Main cleanup function"""
|
|
print("Starting Tanya's mailbox cleanup...")
|
|
|
|
# Connect to IMAP
|
|
mail = connect_imap()
|
|
|
|
# Get all folders
|
|
folders = get_folders(mail)
|
|
print(f"Found folders: {folders}")
|
|
|
|
# Statistics
|
|
stats = {'deleted': 0, 'kept': 0}
|
|
|
|
# Process each folder
|
|
for folder in folders:
|
|
process_folder(mail, folder, stats)
|
|
|
|
# Final summary
|
|
print(f"\n{'='*50}")
|
|
print("CLEANUP COMPLETE!")
|
|
print(f"Total emails deleted: {stats['deleted']}")
|
|
print(f"Total emails kept: {stats['kept']}")
|
|
print(f"{'='*50}")
|
|
|
|
# Close connection
|
|
mail.close()
|
|
mail.logout()
|
|
print("Connection closed.")
|
|
|
|
if __name__ == "__main__":
|
|
main() |