mail-agent/src/actions/unsubscribe.py

126 lines
4.0 KiB
Python

"""Unsubscribe action handler."""
import logging
import re
from typing import Optional
from urllib.parse import urlparse
import httpx
from ..models import Message
logger = logging.getLogger(__name__)
def find_unsubscribe_link(message: Message) -> Optional[str]:
"""Find unsubscribe link in an email message.
Checks:
1. List-Unsubscribe header (TODO: needs raw headers)
2. HTML body for common unsubscribe patterns
3. Text body for unsubscribe URLs
"""
# Search patterns
patterns = [
r'href=["\']?(https?://[^"\'>\s]*unsubscribe[^"\'>\s]*)["\']?',
r'href=["\']?(https?://[^"\'>\s]*optout[^"\'>\s]*)["\']?',
r'href=["\']?(https?://[^"\'>\s]*opt-out[^"\'>\s]*)["\']?',
r'href=["\']?(https?://[^"\'>\s]*remove[^"\'>\s]*)["\']?',
r'(https?://[^\s<>"]*unsubscribe[^\s<>"]*)',
r'(https?://[^\s<>"]*optout[^\s<>"]*)',
]
# Search in HTML body first
if message.body_html:
for pattern in patterns:
matches = re.findall(pattern, message.body_html, re.IGNORECASE)
if matches:
url = matches[0]
if _is_valid_unsubscribe_url(url):
return url
# Search in text body
if message.body_text:
for pattern in patterns:
matches = re.findall(pattern, message.body_text, re.IGNORECASE)
if matches:
url = matches[0]
if _is_valid_unsubscribe_url(url):
return url
return None
def _is_valid_unsubscribe_url(url: str) -> bool:
"""Validate that a URL looks like a legitimate unsubscribe link."""
try:
parsed = urlparse(url)
# Must be HTTP(S)
if parsed.scheme not in ("http", "https"):
return False
# Must have a host
if not parsed.netloc:
return False
# Reject obvious non-unsubscribe URLs
suspicious = ["login", "password", "account", "download"]
for term in suspicious:
if term in url.lower() and "unsubscribe" not in url.lower():
return False
return True
except Exception:
return False
async def execute_unsubscribe(url: str) -> tuple[bool, str]:
"""Execute an unsubscribe action by visiting the URL.
Returns (success, message).
"""
try:
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
headers={
"User-Agent": "Mozilla/5.0 (compatible; MailAgent/1.0)",
},
) as client:
response = await client.get(url)
# Check for success indicators
if response.status_code == 200:
content = response.text.lower()
# Look for success messages
success_indicators = [
"unsubscribed",
"removed",
"successfully",
"you have been",
"no longer",
]
for indicator in success_indicators:
if indicator in content:
logger.info(f"Unsubscribe successful: {url}")
return True, "Successfully unsubscribed"
# If we got 200 but no clear success message, assume it worked
# (many unsubscribe pages just say "done" or redirect)
logger.info(f"Unsubscribe completed (no confirmation): {url}")
return True, "Unsubscribe request sent"
else:
logger.warning(f"Unsubscribe failed: {response.status_code} for {url}")
return False, f"HTTP {response.status_code}"
except httpx.TimeoutException:
logger.error(f"Unsubscribe timeout: {url}")
return False, "Request timed out"
except Exception as e:
logger.error(f"Unsubscribe error: {e}")
return False, str(e)