126 lines
4.0 KiB
Python
126 lines
4.0 KiB
Python
"""Unsubscribe action handler."""
|
|
import logging
|
|
import re
|
|
from typing import Optional
|
|
from urllib.parse import urlparse
|
|
|
|
import httpx
|
|
|
|
from ..models import Message
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def find_unsubscribe_link(message: Message) -> Optional[str]:
|
|
"""Find unsubscribe link in an email message.
|
|
|
|
Checks:
|
|
1. List-Unsubscribe header (TODO: needs raw headers)
|
|
2. HTML body for common unsubscribe patterns
|
|
3. Text body for unsubscribe URLs
|
|
"""
|
|
# Search patterns
|
|
patterns = [
|
|
r'href=["\']?(https?://[^"\'>\s]*unsubscribe[^"\'>\s]*)["\']?',
|
|
r'href=["\']?(https?://[^"\'>\s]*optout[^"\'>\s]*)["\']?',
|
|
r'href=["\']?(https?://[^"\'>\s]*opt-out[^"\'>\s]*)["\']?',
|
|
r'href=["\']?(https?://[^"\'>\s]*remove[^"\'>\s]*)["\']?',
|
|
r'(https?://[^\s<>"]*unsubscribe[^\s<>"]*)',
|
|
r'(https?://[^\s<>"]*optout[^\s<>"]*)',
|
|
]
|
|
|
|
# Search in HTML body first
|
|
if message.body_html:
|
|
for pattern in patterns:
|
|
matches = re.findall(pattern, message.body_html, re.IGNORECASE)
|
|
if matches:
|
|
url = matches[0]
|
|
if _is_valid_unsubscribe_url(url):
|
|
return url
|
|
|
|
# Search in text body
|
|
if message.body_text:
|
|
for pattern in patterns:
|
|
matches = re.findall(pattern, message.body_text, re.IGNORECASE)
|
|
if matches:
|
|
url = matches[0]
|
|
if _is_valid_unsubscribe_url(url):
|
|
return url
|
|
|
|
return None
|
|
|
|
|
|
def _is_valid_unsubscribe_url(url: str) -> bool:
|
|
"""Validate that a URL looks like a legitimate unsubscribe link."""
|
|
try:
|
|
parsed = urlparse(url)
|
|
|
|
# Must be HTTP(S)
|
|
if parsed.scheme not in ("http", "https"):
|
|
return False
|
|
|
|
# Must have a host
|
|
if not parsed.netloc:
|
|
return False
|
|
|
|
# Reject obvious non-unsubscribe URLs
|
|
suspicious = ["login", "password", "account", "download"]
|
|
for term in suspicious:
|
|
if term in url.lower() and "unsubscribe" not in url.lower():
|
|
return False
|
|
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
async def execute_unsubscribe(url: str) -> tuple[bool, str]:
|
|
"""Execute an unsubscribe action by visiting the URL.
|
|
|
|
Returns (success, message).
|
|
"""
|
|
try:
|
|
async with httpx.AsyncClient(
|
|
timeout=30.0,
|
|
follow_redirects=True,
|
|
headers={
|
|
"User-Agent": "Mozilla/5.0 (compatible; MailAgent/1.0)",
|
|
},
|
|
) as client:
|
|
response = await client.get(url)
|
|
|
|
# Check for success indicators
|
|
if response.status_code == 200:
|
|
content = response.text.lower()
|
|
|
|
# Look for success messages
|
|
success_indicators = [
|
|
"unsubscribed",
|
|
"removed",
|
|
"successfully",
|
|
"you have been",
|
|
"no longer",
|
|
]
|
|
|
|
for indicator in success_indicators:
|
|
if indicator in content:
|
|
logger.info(f"Unsubscribe successful: {url}")
|
|
return True, "Successfully unsubscribed"
|
|
|
|
# If we got 200 but no clear success message, assume it worked
|
|
# (many unsubscribe pages just say "done" or redirect)
|
|
logger.info(f"Unsubscribe completed (no confirmation): {url}")
|
|
return True, "Unsubscribe request sent"
|
|
|
|
else:
|
|
logger.warning(f"Unsubscribe failed: {response.status_code} for {url}")
|
|
return False, f"HTTP {response.status_code}"
|
|
|
|
except httpx.TimeoutException:
|
|
logger.error(f"Unsubscribe timeout: {url}")
|
|
return False, "Request timed out"
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unsubscribe error: {e}")
|
|
return False, str(e)
|