156 lines
5.4 KiB
Python
156 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Post-process merged.vcf:
|
|
1. Remove contacts with no email AND no phone
|
|
2. Remove contacts with no name (FN or ORG) — list their emails for review
|
|
3. Convert Dutch phone numbers (0X...) to international format (+31...)
|
|
"""
|
|
|
|
import re, sys
|
|
|
|
INPUT = "/home/johan/clawd/tmp/contacts/merged.vcf"
|
|
OUTPUT = "/home/johan/clawd/tmp/contacts/final.vcf"
|
|
|
|
# ── vCard parser (simple block-based) ─────────────────────────────────────────
|
|
|
|
def parse_blocks(path):
|
|
with open(path, encoding='utf-8', errors='replace') as f:
|
|
raw = re.sub(r'\r?\n[ \t]', '', f.read())
|
|
blocks = []
|
|
for block in re.split(r'(?=BEGIN:VCARD)', raw, flags=re.I):
|
|
block = block.strip()
|
|
if block.upper().startswith('BEGIN:VCARD'):
|
|
blocks.append(block)
|
|
return blocks
|
|
|
|
def block_lines(block):
|
|
return [l for l in block.splitlines() if l.strip() and l.upper() not in ('BEGIN:VCARD', 'END:VCARD', 'VERSION:3.0')]
|
|
|
|
def get_field_val(block, prefix):
|
|
"""First value matching field prefix."""
|
|
for line in block.splitlines():
|
|
k, _, v = line.partition(':')
|
|
if re.match(r'(ITEM\d+\.)?' + prefix, k.strip(), re.I):
|
|
return v.strip()
|
|
return ''
|
|
|
|
def has_email(block):
|
|
return bool(re.search(r'^(ITEM\d+\.)?EMAIL\b', block, re.I | re.M))
|
|
|
|
def has_phone(block):
|
|
return bool(re.search(r'^(ITEM\d+\.)?TEL\b', block, re.I | re.M))
|
|
|
|
def get_name(block):
|
|
fn = get_field_val(block, 'FN')
|
|
if fn:
|
|
return fn
|
|
org = get_field_val(block, 'ORG')
|
|
return org
|
|
|
|
def get_emails(block):
|
|
emails = []
|
|
for line in block.splitlines():
|
|
k, _, v = line.partition(':')
|
|
if re.match(r'(ITEM\d+\.)?EMAIL', k.strip(), re.I) and v.strip():
|
|
emails.append(v.strip().lower())
|
|
return emails
|
|
|
|
# ── Dutch phone normaliser ─────────────────────────────────────────────────────
|
|
|
|
def nl_to_intl(phone):
|
|
"""
|
|
Convert Dutch local format to E.164-ish international format.
|
|
Rules:
|
|
- Already international (+XX or 00XX) → leave alone
|
|
- US numbers (10 digit, starts with 1 after stripping) → leave alone
|
|
- 10-digit Dutch numbers starting with 0 → replace leading 0 with +31
|
|
Handles spaces/dashes/dots in input, preserves spacing style.
|
|
"""
|
|
p = phone.strip()
|
|
|
|
# Already international
|
|
if p.startswith('+') or p.startswith('00'):
|
|
return p
|
|
|
|
# Strip formatting to count digits
|
|
digits_only = re.sub(r'\D', '', p)
|
|
|
|
# Must start with 0 and be 10 digits to be Dutch local
|
|
if not digits_only.startswith('0') or len(digits_only) != 10:
|
|
return p
|
|
|
|
# Replace leading 0 with +31, preserve the rest of the formatting
|
|
# e.g. "06 23 123 456" → "+31 6 23 123 456"
|
|
# e.g. "0646438755" → "+31646438755"
|
|
# e.g. "020-1234567" → "+3120-1234567" (Amsterdam landline)
|
|
converted = '+31' + p[1:] # drop leading '0', prepend +31
|
|
return converted
|
|
|
|
def convert_phones_in_block(block):
|
|
lines = []
|
|
for line in block.splitlines():
|
|
k, _, v = line.partition(':')
|
|
if re.match(r'(ITEM\d+\.)?TEL', k.strip(), re.I) and v.strip():
|
|
new_v = nl_to_intl(v.strip())
|
|
if new_v != v.strip():
|
|
line = f'{k}:{new_v}'
|
|
lines.append(line)
|
|
return '\n'.join(lines)
|
|
|
|
# ── main ──────────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
blocks = parse_blocks(INPUT)
|
|
print(f"Input: {len(blocks)} contacts")
|
|
|
|
kept = []
|
|
removed_no_contact = 0
|
|
removed_no_name_emails = [] # emails of nameless contacts
|
|
|
|
phones_converted = 0
|
|
|
|
for block in blocks:
|
|
# Step 1: must have email or phone
|
|
if not has_email(block) and not has_phone(block):
|
|
removed_no_contact += 1
|
|
continue
|
|
|
|
# Step 2: must have a name
|
|
name = get_name(block)
|
|
if not name or not name.strip():
|
|
emails = get_emails(block)
|
|
removed_no_name_emails.extend(emails if emails else ['(no email — phone only)'])
|
|
continue
|
|
|
|
# Step 3: Dutch phone normalisation
|
|
new_block = convert_phones_in_block(block)
|
|
if new_block != block:
|
|
phones_converted += 1
|
|
block = new_block
|
|
|
|
kept.append(block)
|
|
|
|
print(f"Removed (no email+phone): {removed_no_contact}")
|
|
print(f"Removed (no name): {len(removed_no_name_emails)} email(s) from nameless contacts")
|
|
print(f"Phone numbers converted: {phones_converted}")
|
|
print(f"Output: {len(kept)} contacts")
|
|
|
|
# Write final vcf
|
|
with open(OUTPUT, 'w', encoding='utf-8') as f:
|
|
for block in kept:
|
|
if not block.startswith('BEGIN:VCARD'):
|
|
block = 'BEGIN:VCARD\nVERSION:3.0\n' + block
|
|
if not block.endswith('END:VCARD'):
|
|
block = block + '\nEND:VCARD'
|
|
f.write(block + '\n\n')
|
|
|
|
print(f"\nWritten to: {OUTPUT}")
|
|
|
|
if removed_no_name_emails:
|
|
print(f"\n── Nameless contacts (emails for review) ──────────────────")
|
|
for e in sorted(set(removed_no_name_emails)):
|
|
print(f" {e}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|