"""Unsubscribe action handler.""" import logging import re from typing import Optional from urllib.parse import urlparse import httpx from ..models import Message logger = logging.getLogger(__name__) def find_unsubscribe_link(message: Message) -> Optional[str]: """Find unsubscribe link in an email message. Checks: 1. List-Unsubscribe header (TODO: needs raw headers) 2. HTML body for common unsubscribe patterns 3. Text body for unsubscribe URLs """ # Search patterns patterns = [ r'href=["\']?(https?://[^"\'>\s]*unsubscribe[^"\'>\s]*)["\']?', r'href=["\']?(https?://[^"\'>\s]*optout[^"\'>\s]*)["\']?', r'href=["\']?(https?://[^"\'>\s]*opt-out[^"\'>\s]*)["\']?', r'href=["\']?(https?://[^"\'>\s]*remove[^"\'>\s]*)["\']?', r'(https?://[^\s<>"]*unsubscribe[^\s<>"]*)', r'(https?://[^\s<>"]*optout[^\s<>"]*)', ] # Search in HTML body first if message.body_html: for pattern in patterns: matches = re.findall(pattern, message.body_html, re.IGNORECASE) if matches: url = matches[0] if _is_valid_unsubscribe_url(url): return url # Search in text body if message.body_text: for pattern in patterns: matches = re.findall(pattern, message.body_text, re.IGNORECASE) if matches: url = matches[0] if _is_valid_unsubscribe_url(url): return url return None def _is_valid_unsubscribe_url(url: str) -> bool: """Validate that a URL looks like a legitimate unsubscribe link.""" try: parsed = urlparse(url) # Must be HTTP(S) if parsed.scheme not in ("http", "https"): return False # Must have a host if not parsed.netloc: return False # Reject obvious non-unsubscribe URLs suspicious = ["login", "password", "account", "download"] for term in suspicious: if term in url.lower() and "unsubscribe" not in url.lower(): return False return True except Exception: return False async def execute_unsubscribe(url: str) -> tuple[bool, str]: """Execute an unsubscribe action by visiting the URL. Returns (success, message). """ try: async with httpx.AsyncClient( timeout=30.0, follow_redirects=True, headers={ "User-Agent": "Mozilla/5.0 (compatible; MailAgent/1.0)", }, ) as client: response = await client.get(url) # Check for success indicators if response.status_code == 200: content = response.text.lower() # Look for success messages success_indicators = [ "unsubscribed", "removed", "successfully", "you have been", "no longer", ] for indicator in success_indicators: if indicator in content: logger.info(f"Unsubscribe successful: {url}") return True, "Successfully unsubscribed" # If we got 200 but no clear success message, assume it worked # (many unsubscribe pages just say "done" or redirect) logger.info(f"Unsubscribe completed (no confirmation): {url}") return True, "Unsubscribe request sent" else: logger.warning(f"Unsubscribe failed: {response.status_code} for {url}") return False, f"HTTP {response.status_code}" except httpx.TimeoutException: logger.error(f"Unsubscribe timeout: {url}") return False, "Request timed out" except Exception as e: logger.error(f"Unsubscribe error: {e}") return False, str(e)