160 lines
3.8 KiB
Go
160 lines
3.8 KiB
Go
package main
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// Match date/time prefix: M/DD/YY, H:MM[narrow-nbsp]AM/PM -
|
|
// The narrow no-break space (\u202f) appears before AM/PM in WhatsApp exports.
|
|
var messageStartRe = regexp.MustCompile(`^(\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}[\s\x{202f}]*[APap][Mm]) - (.*)`)
|
|
|
|
// Time layouts to try parsing (with both regular space and narrow no-break space)
|
|
var timeLayouts = []string{
|
|
"1/2/06, 3:04\u202fPM",
|
|
"1/2/06, 3:04 PM",
|
|
"1/2/06, 3:04\u202fam",
|
|
"1/2/06, 3:04 am",
|
|
}
|
|
|
|
func parseTimestamp(s string) (time.Time, bool) {
|
|
// Normalize: replace narrow no-break space with regular space for parsing
|
|
normalized := strings.ReplaceAll(s, "\u202f", " ")
|
|
// Also try with the original
|
|
for _, layout := range []string{"1/2/06, 3:04 PM", "1/2/06, 3:04 pm"} {
|
|
t, err := time.Parse(layout, normalized)
|
|
if err == nil {
|
|
return t, true
|
|
}
|
|
// Try uppercase
|
|
t, err = time.Parse(layout, strings.ToUpper(normalized))
|
|
if err == nil {
|
|
return t, true
|
|
}
|
|
}
|
|
return time.Time{}, false
|
|
}
|
|
|
|
func classifyAttachment(filename string) string {
|
|
lower := strings.ToLower(filename)
|
|
if strings.HasPrefix(lower, "stk-") && strings.HasSuffix(lower, ".webp") {
|
|
return "sticker"
|
|
}
|
|
if strings.HasPrefix(lower, "ptt-") {
|
|
return "audio"
|
|
}
|
|
for _, ext := range []string{".jpg", ".jpeg", ".png", ".webp", ".gif"} {
|
|
if strings.HasSuffix(lower, ext) {
|
|
return "image"
|
|
}
|
|
}
|
|
for _, ext := range []string{".mp4", ".3gp", ".mov"} {
|
|
if strings.HasSuffix(lower, ext) {
|
|
return "video"
|
|
}
|
|
}
|
|
for _, ext := range []string{".opus", ".ogg", ".m4a", ".mp3", ".aac"} {
|
|
if strings.HasSuffix(lower, ext) {
|
|
return "audio"
|
|
}
|
|
}
|
|
return "document"
|
|
}
|
|
|
|
func parseChat(content string) []Message {
|
|
// Strip UTF-8 BOM
|
|
content = strings.TrimPrefix(content, "\xef\xbb\xbf")
|
|
// Normalize line endings
|
|
content = strings.ReplaceAll(content, "\r\n", "\n")
|
|
content = strings.ReplaceAll(content, "\r", "\n")
|
|
|
|
lines := strings.Split(content, "\n")
|
|
var messages []Message
|
|
|
|
for i := 0; i < len(lines); i++ {
|
|
line := lines[i]
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
m := messageStartRe.FindStringSubmatch(line)
|
|
if m == nil {
|
|
// Continuation line — append to previous message
|
|
if len(messages) > 0 {
|
|
prev := &messages[len(messages)-1]
|
|
if prev.Text != "" {
|
|
prev.Text += "\n"
|
|
}
|
|
prev.Text += line
|
|
// Check if continuation line has attachment
|
|
checkAttachment(prev, line)
|
|
}
|
|
continue
|
|
}
|
|
|
|
timestampStr := m[1]
|
|
rest := m[2]
|
|
|
|
ts, ok := parseTimestamp(timestampStr)
|
|
if !ok {
|
|
// If we can't parse timestamp, treat as continuation
|
|
if len(messages) > 0 {
|
|
prev := &messages[len(messages)-1]
|
|
if prev.Text != "" {
|
|
prev.Text += "\n"
|
|
}
|
|
prev.Text += line
|
|
}
|
|
continue
|
|
}
|
|
|
|
msg := Message{
|
|
Timestamp: ts,
|
|
}
|
|
|
|
// Check if it's a system message (no colon after sender) or a user message
|
|
colonIdx := strings.Index(rest, ": ")
|
|
if colonIdx == -1 {
|
|
// System message
|
|
msg.IsSystem = true
|
|
msg.Text = rest
|
|
} else {
|
|
msg.Sender = rest[:colonIdx]
|
|
msg.Text = rest[colonIdx+2:]
|
|
}
|
|
|
|
checkAttachment(&msg, msg.Text)
|
|
messages = append(messages, msg)
|
|
}
|
|
|
|
return messages
|
|
}
|
|
|
|
func checkAttachment(msg *Message, text string) {
|
|
const suffix = " (file attached)"
|
|
// Check each line of text for attachments
|
|
for _, line := range strings.Split(text, "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if strings.HasSuffix(line, suffix) {
|
|
filename := strings.TrimSuffix(line, suffix)
|
|
// Don't add duplicate attachments
|
|
found := false
|
|
for _, a := range msg.Attachments {
|
|
if a.Filename == filename {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
msg.Attachments = append(msg.Attachments, Attachment{
|
|
Filename: filename,
|
|
Type: classifyAttachment(filename),
|
|
})
|
|
}
|
|
// Remove the "(file attached)" text from displayed text
|
|
msg.Text = strings.Replace(msg.Text, line, filename, 1)
|
|
}
|
|
}
|
|
}
|