package main import ( "regexp" "strings" "time" ) // Match date/time prefix: M/DD/YY, H:MM[narrow-nbsp]AM/PM - // The narrow no-break space (\u202f) appears before AM/PM in WhatsApp exports. var messageStartRe = regexp.MustCompile(`^(\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}[\s\x{202f}]*[APap][Mm]) - (.*)`) // Time layouts to try parsing (with both regular space and narrow no-break space) var timeLayouts = []string{ "1/2/06, 3:04\u202fPM", "1/2/06, 3:04 PM", "1/2/06, 3:04\u202fam", "1/2/06, 3:04 am", } func parseTimestamp(s string) (time.Time, bool) { // Normalize: replace narrow no-break space with regular space for parsing normalized := strings.ReplaceAll(s, "\u202f", " ") // Also try with the original for _, layout := range []string{"1/2/06, 3:04 PM", "1/2/06, 3:04 pm"} { t, err := time.Parse(layout, normalized) if err == nil { return t, true } // Try uppercase t, err = time.Parse(layout, strings.ToUpper(normalized)) if err == nil { return t, true } } return time.Time{}, false } func classifyAttachment(filename string) string { lower := strings.ToLower(filename) if strings.HasPrefix(lower, "stk-") && strings.HasSuffix(lower, ".webp") { return "sticker" } if strings.HasPrefix(lower, "ptt-") { return "audio" } for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp", ".gif"} { if strings.HasSuffix(lower, ext) { return "image" } } for _, ext := range []string{".mp4", ".3gp", ".mov"} { if strings.HasSuffix(lower, ext) { return "video" } } for _, ext := range []string{".opus", ".ogg", ".m4a", ".mp3", ".aac"} { if strings.HasSuffix(lower, ext) { return "audio" } } return "document" } func parseChat(content string) []Message { // Strip UTF-8 BOM content = strings.TrimPrefix(content, "\xef\xbb\xbf") // Normalize line endings content = strings.ReplaceAll(content, "\r\n", "\n") content = strings.ReplaceAll(content, "\r", "\n") lines := strings.Split(content, "\n") var messages []Message for i := 0; i < len(lines); i++ { line := lines[i] if line == "" { continue } m := messageStartRe.FindStringSubmatch(line) if m == nil { // Continuation line — append to previous message if len(messages) > 0 { prev := &messages[len(messages)-1] if prev.Text != "" { prev.Text += "\n" } prev.Text += line // Check if continuation line has attachment checkAttachment(prev, line) } continue } timestampStr := m[1] rest := m[2] ts, ok := parseTimestamp(timestampStr) if !ok { // If we can't parse timestamp, treat as continuation if len(messages) > 0 { prev := &messages[len(messages)-1] if prev.Text != "" { prev.Text += "\n" } prev.Text += line } continue } msg := Message{ Timestamp: ts, } // Check if it's a system message (no colon after sender) or a user message colonIdx := strings.Index(rest, ": ") if colonIdx == -1 { // System message msg.IsSystem = true msg.Text = rest } else { msg.Sender = rest[:colonIdx] msg.Text = rest[colonIdx+2:] } checkAttachment(&msg, msg.Text) messages = append(messages, msg) } return messages } func checkAttachment(msg *Message, text string) { const suffix = " (file attached)" // Check each line of text for attachments for _, line := range strings.Split(text, "\n") { line = strings.TrimSpace(line) if strings.HasSuffix(line, suffix) { filename := strings.TrimSuffix(line, suffix) // Don't add duplicate attachments found := false for _, a := range msg.Attachments { if a.Filename == filename { found = true break } } if !found { msg.Attachments = append(msg.Attachments, Attachment{ Filename: filename, Type: classifyAttachment(filename), }) } // Remove the "(file attached)" text from displayed text msg.Text = strings.Replace(msg.Text, line, filename, 1) } } }