diff --git a/mime.go b/mime.go index 4970a8a..3143d83 100644 --- a/mime.go +++ b/mime.go @@ -210,49 +210,77 @@ func extractAttachmentParts(entity *message.Entity, attachments *[]Attachment) { } // stripHTML removes HTML tags and returns plain text -func stripHTML(html string) string { - var result strings.Builder +func stripHTML(htmlContent string) string { + // Remove style and script blocks first + result := htmlContent + + // Remove blocks + for { + start := strings.Index(strings.ToLower(result), "") + if end == -1 { + break + } + result = result[:start] + result[start+end+8:] + } + + // Remove blocks + for { + start := strings.Index(strings.ToLower(result), "") + if end == -1 { + break + } + result = result[:start] + result[start+end+9:] + } + + // Remove ... blocks + for { + start := strings.Index(strings.ToLower(result), "") + if end == -1 { + break + } + result = result[:start] + result[start+end+7:] + } + + // Replace common block elements with newlines + result = strings.ReplaceAll(result, "
", "\n") + result = strings.ReplaceAll(result, "
", "\n") + result = strings.ReplaceAll(result, "
", "\n") + result = strings.ReplaceAll(result, "

", "\n") + result = strings.ReplaceAll(result, "", "\n") + result = strings.ReplaceAll(result, "", "\n") + result = strings.ReplaceAll(result, "", "\n") + + // Decode HTML entities + result = decodeHTMLEntities(result) + + // Remove all remaining tags + var builder strings.Builder inTag := false - inStyle := false - inScript := false - - html = strings.ReplaceAll(html, "
", "\n") - html = strings.ReplaceAll(html, "
", "\n") - html = strings.ReplaceAll(html, "
", "\n") - html = strings.ReplaceAll(html, "

", "\n") - html = strings.ReplaceAll(html, "", "\n") - html = decodeHTMLEntities(html) - - for _, r := range html { + for _, r := range result { switch { case r == '<': inTag = true case r == '>': inTag = false - case !inTag && !inStyle && !inScript: - result.WriteRune(r) - } - - // Track style/script blocks (simplified) - if inTag { - lower := strings.ToLower(string(r)) - if strings.Contains(lower, "style") { - inStyle = true - } - if strings.Contains(lower, "/style") { - inStyle = false - } - if strings.Contains(lower, "script") { - inScript = true - } - if strings.Contains(lower, "/script") { - inScript = false - } + case !inTag: + builder.WriteRune(r) } } // Clean up whitespace - text := result.String() + text := builder.String() lines := strings.Split(text, "\n") var cleaned []string for _, line := range lines {