message-center/mime.go

package main

import (
	"bytes"
	"html"
	"io"
	"mime"
	"mime/quotedprintable"
	"encoding/base64"
	"strings"

	"github.com/emersion/go-message"
	_ "github.com/emersion/go-message/charset"
)

// decodeHTMLEntities converts HTML entities to their text equivalents
func decodeHTMLEntities(s string) string {
	return html.UnescapeString(s)
}

// ParsedBody contains extracted text and HTML from a MIME message
type ParsedBody struct {
	Text string
	HTML string
}

// ParseMIMEBody extracts text/plain and text/html parts from raw email bytes
func ParseMIMEBody(raw []byte) ParsedBody {
	result := ParsedBody{}

	entity, err := message.Read(bytes.NewReader(raw))
	if err != nil {
		// Fallback: return raw as text
		result.Text = string(raw)
		return result
	}

	extractParts(entity, &result)
	return result
}

func extractParts(entity *message.Entity, result *ParsedBody) {
	mediaType, params, err := mime.ParseMediaType(entity.Header.Get("Content-Type"))
	if err != nil {
		mediaType = "text/plain"
	}

	if strings.HasPrefix(mediaType, "multipart/") {
		// Multipart message - recurse into parts
		mr := entity.MultipartReader()
		if mr == nil {
			return
		}

		for {
			part, err := mr.NextPart()
			if err != nil {
				break
			}
			extractParts(part, result)
		}
	} else {
		// Single part - extract content
		body, err := io.ReadAll(entity.Body)
		if err != nil {
			return
		}

		// Decode transfer encoding
		encoding := strings.ToLower(entity.Header.Get("Content-Transfer-Encoding"))
		decoded := decodeBody(body, encoding)

		// Decode charset if needed
		charset := params["charset"]
		if charset != "" {
			// go-message/charset handles this automatically via import
		}

		text := string(decoded)

		switch mediaType {
		case "text/plain":
			if result.Text == "" {
				result.Text = text
			}
		case "text/html":
			if result.HTML == "" {
				result.HTML = text
				// Also extract text from HTML if we don't have plain text
				if result.Text == "" {
					result.Text = stripHTML(text)
				}
			}
		}
	}
}

func decodeBody(body []byte, encoding string) []byte {
	switch encoding {
	case "quoted-printable":
		decoded, err := io.ReadAll(quotedprintable.NewReader(bytes.NewReader(body)))
		if err != nil {
			return body
		}
		return decoded
	case "base64":
		decoded, err := base64.StdEncoding.DecodeString(string(body))
		if err != nil {
			// Try with whitespace removed
			cleaned := strings.ReplaceAll(string(body), "\r\n", "")
			cleaned = strings.ReplaceAll(cleaned, "\n", "")
			decoded, err = base64.StdEncoding.DecodeString(cleaned)
			if err != nil {
				return body
			}
		}
		return decoded
	default:
		return body
	}
}

// Attachment represents an email attachment with content
type Attachment struct {
	Filename    string `json:"filename"`
	ContentType string `json:"content_type"`
	Size        int    `json:"size"`
	Content     string `json:"content"` // base64 encoded
}

// ExtractAttachments extracts all attachments from raw email bytes
func ExtractAttachments(raw []byte) []Attachment {
	attachments := make([]Attachment, 0) // Initialize to empty slice (not nil)

	entity, err := message.Read(bytes.NewReader(raw))
	if err != nil {
		return attachments
	}

	extractAttachmentParts(entity, &attachments)
	return attachments
}

func extractAttachmentParts(entity *message.Entity, attachments *[]Attachment) {
	mediaType, params, err := mime.ParseMediaType(entity.Header.Get("Content-Type"))
	if err != nil {
		return
	}

	if strings.HasPrefix(mediaType, "multipart/") {
		// Multipart message - recurse into parts
		mr := entity.MultipartReader()
		if mr == nil {
			return
		}

		for {
			part, err := mr.NextPart()
			if err != nil {
				break
			}
			extractAttachmentParts(part, attachments)
		}
	} else {
		// Check if this is an attachment
		disp := entity.Header.Get("Content-Disposition")
		if disp == "" {
			return
		}

		dispType, dispParams, err := mime.ParseMediaType(disp)
		if err != nil {
			return
		}

		if !strings.EqualFold(dispType, "attachment") && !strings.EqualFold(dispType, "inline") {
			return
		}

		// Get filename
		filename := dispParams["filename"]
		if filename == "" {
			filename = params["name"]
		}
		if filename == "" {
			// Skip attachments without filename
			return
		}

		// Read body
		body, err := io.ReadAll(entity.Body)
		if err != nil {
			return
		}

		// Decode transfer encoding
		encoding := strings.ToLower(entity.Header.Get("Content-Transfer-Encoding"))
		decoded := decodeBody(body, encoding)

		// Encode as base64 for JSON transport
		content := base64.StdEncoding.EncodeToString(decoded)

		*attachments = append(*attachments, Attachment{
			Filename:    filename,
			ContentType: mediaType,
			Size:        len(decoded),
			Content:     content,
		})
	}
}

// stripHTML removes HTML tags and returns plain text
func stripHTML(html string) string {
	var result strings.Builder
	inTag := false
	inStyle := false
	inScript := false

	html = strings.ReplaceAll(html, "<br>", "\n")
	html = strings.ReplaceAll(html, "<br/>", "\n")
	html = strings.ReplaceAll(html, "<br />", "\n")
	html = strings.ReplaceAll(html, "</p>", "\n")
	html = strings.ReplaceAll(html, "</div>", "\n")
	html = decodeHTMLEntities(html)

	for _, r := range html {
		switch {
		case r == '<':
			inTag = true
		case r == '>':
			inTag = false
		case !inTag && !inStyle && !inScript:
			result.WriteRune(r)
		}

		// Track style/script blocks (simplified)
		if inTag {
			lower := strings.ToLower(string(r))
			if strings.Contains(lower, "style") {
				inStyle = true
			}
			if strings.Contains(lower, "/style") {
				inStyle = false
			}
			if strings.Contains(lower, "script") {
				inScript = true
			}
			if strings.Contains(lower, "/script") {
				inScript = false
			}
		}
	}

	// Clean up whitespace
	text := result.String()
	lines := strings.Split(text, "\n")
	var cleaned []string
	for _, line := range lines {
		line = strings.TrimSpace(line)
		if line != "" {
			cleaned = append(cleaned, line)
		}
	}

	return strings.Join(cleaned, "\n")
}