chore: auto-commit uncommitted changes
This commit is contained in:
parent
19676f75fb
commit
8cdd003f94
|
|
@ -1,5 +1,17 @@
|
|||
# 2026-02-17
|
||||
|
||||
## Password Manager Migration: Proton Pass → Vaultwarden
|
||||
- Leaving Proton ecosystem (mail → Stalwart) means losing Proton Pass too
|
||||
- **Decision: migrate to Vaultwarden** (already running at vault.inou.com)
|
||||
- **Steps:**
|
||||
1. Johan creates account at https://vault.inou.com
|
||||
2. Export from Proton Pass (Settings → Export → CSV or JSON)
|
||||
3. Import into Vaultwarden (compatible with Bitwarden import format)
|
||||
4. Disable signups after account created (set SIGNUPS_ALLOWED=false in docker-compose)
|
||||
5. Install Bitwarden app on iPhone, Mac, browser extension — point server to https://vault.inou.com
|
||||
- **TODO:** Set up rclone backup to GDrive (needs OAuth browser auth on Zurich)
|
||||
- Fish Audio API key and other secrets should move here from plaintext files
|
||||
|
||||
## Domain Registrar Migration Plan
|
||||
- OpenProvider has ~€80 credits remaining
|
||||
- As domains come up for renewal, transfer to Cloudflare Registrar instead
|
||||
|
|
@ -67,3 +79,6 @@
|
|||
- **Test with curl before deploying** — always get "curl proof" before pushing code changes
|
||||
- **Fireworks guarantees privacy; Grok (xAI) does not** — use Fireworks for anything touching private data (emails, Teams). Grok OK for public news scanning.
|
||||
- **Claude Sonnet 4.6 released today** — 1M context (beta), adaptive thinking, context compaction (beta), $3/$15 per M tokens
|
||||
|
||||
## Sophia
|
||||
- Blood draw at 12:00 PM, Health Link, 851 Brightwater Blvd NE, $65 (Karina)
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,9 +1,9 @@
|
|||
{
|
||||
"last_updated": "2026-02-17T23:00:03.380113Z",
|
||||
"last_updated": "2026-02-18T05:00:04.947668Z",
|
||||
"source": "api",
|
||||
"session_percent": 54,
|
||||
"session_resets": "2026-02-18T00:00:00.351744+00:00",
|
||||
"weekly_percent": 53,
|
||||
"weekly_resets": "2026-02-21T19:00:00.351762+00:00",
|
||||
"sonnet_percent": 7
|
||||
"session_percent": 0,
|
||||
"session_resets": null,
|
||||
"weekly_percent": 55,
|
||||
"weekly_resets": "2026-02-21T18:59:59.902276+00:00",
|
||||
"sonnet_percent": 10
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"lastChecks": {
|
||||
"email": 1739606500,
|
||||
"email": 1771380446,
|
||||
"calendar": null,
|
||||
"weather": 1771163041,
|
||||
"briefing": 1771163041,
|
||||
|
|
|
|||
|
|
@ -1,137 +1,38 @@
|
|||
{
|
||||
"date": "2026-02-17",
|
||||
"timestamp": "2026-02-17T09:00:50-05:00",
|
||||
"openclaw": {
|
||||
"before": "2026.2.15",
|
||||
"latest": "2026.2.15",
|
||||
"updated": false
|
||||
"time": "21:00 ET",
|
||||
"routine": "nightly-maintenance",
|
||||
"results": {
|
||||
"memory": {
|
||||
"working_context": "updated",
|
||||
"daily_note": "updated (2026-02-17.md)"
|
||||
},
|
||||
"os_updates": {
|
||||
"status": "success",
|
||||
"packages_upgraded": 25,
|
||||
"kernel_note": "New kernel available — system reboot recommended",
|
||||
"kernel_expected": "6.8.0-100-generic"
|
||||
},
|
||||
"claude_code": {
|
||||
"before": "2.1.44",
|
||||
"latest": "2.1.44",
|
||||
"updated": false
|
||||
"status": "updated",
|
||||
"from": "2.1.42",
|
||||
"to": "2.1.45",
|
||||
"note": "Both /usr/bin/claude (system) and ~/.npm-global/bin/claude updated to 2.1.45"
|
||||
},
|
||||
"os": {
|
||||
"available": 23,
|
||||
"packages": [
|
||||
{
|
||||
"name": "cpp-13-x86-64-linux-gnu",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
"openclaw": {
|
||||
"status": "up_to_date",
|
||||
"version": "2026.2.15"
|
||||
},
|
||||
{
|
||||
"name": "cpp-13",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
"inou_mcp_bundle": {
|
||||
"status": "check_skipped",
|
||||
"current_version": "1.6.1",
|
||||
"note": "Download URL not available (https://inou.com/download/inou.mcpb returned non-200)"
|
||||
},
|
||||
{
|
||||
"name": "g++-13-x86-64-linux-gnu",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "g++-13",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "gcc-13-base",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "gcc-13-x86-64-linux-gnu",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "gcc-13",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "gcc-14-base",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libasan8",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libatomic1",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libcc1-0",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libgcc-13-dev",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libgcc-s1",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libgfortran5",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libgomp1",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libhwasan0",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libitm1",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "liblsan0",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libquadmath0",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libstdc++-13-dev",
|
||||
"from": "13.3.0-6ubuntu2~24.04",
|
||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libstdc++6",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libtsan2",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
},
|
||||
{
|
||||
"name": "libubsan1",
|
||||
"from": "14.2.0-4ubuntu2~24.04",
|
||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
||||
"session_cleanup": {
|
||||
"status": "success",
|
||||
"orphaned_jsonl_deleted": 212,
|
||||
"cron_run_keys_removed": 11,
|
||||
"sessions_json_remaining_keys": 37
|
||||
}
|
||||
}
|
||||
],
|
||||
"updated": true,
|
||||
"reboot_required": true
|
||||
},
|
||||
"gateway_restarted": false
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# Working Context (updated 2026-02-17 3:56 PM ET)
|
||||
# Working Context (updated 2026-02-17 9:00 PM ET)
|
||||
|
||||
## Active Projects
|
||||
|
||||
|
|
@ -19,32 +19,95 @@
|
|||
- Vision: contextual voice announcements (deliveries, appliances, Russian for in-laws)
|
||||
- Need Tanya approval before expanding beyond office
|
||||
- **Next feature:** Voice meeting announcements — pre-generate TTS at calendar fetch, play at T-5m
|
||||
- **Architecture decision:** Alert dashboard will merge INTO Message Center (not now, but planned). MC becomes the single brain: calendar → TTS → alerts → visual display. For now, build voice into alert-dashboard server.
|
||||
- **Architecture decision:** Alert dashboard will merge INTO Message Center (not now, but planned)
|
||||
|
||||
### Message Center (MC) — M365 Pipeline
|
||||
- **Working:** MC detects emails/Teams → K2.5 summarizes → POSTs to Fully dashboard
|
||||
- **K2.5 completions API** is the correct endpoint (chat API broken for formatting)
|
||||
- **Conversation-aware**: Johan's replies auto-clear Fully alerts for that Teams conversation
|
||||
- **Group coloring**: Teams messages from same conv share background color (hue from conv ID hash)
|
||||
- **Fireworks API key:** `fw_RVcDe4c6mN4utKLsgA7hTm` (working key — old stale key caused issues)
|
||||
- **OC m365 hook removed**: MC handles everything directly now
|
||||
- Previously reported M365 fetch error (ErrorInvalidUrlQueryFilter ~2/16) — appears resolved
|
||||
|
||||
### Fully Dashboard (Forge / Tablets)
|
||||
- **Budget pace indicator:** `usage% / time%` (week = Sat 2pm → Sat 2pm ET)
|
||||
- >100% = red, 90-100% = white, <90% = green. Shows as `⚡ 108%`
|
||||
- **News streams:** Topics (markets, ai, nabl, news, netherlands) each get own bar below alerts
|
||||
- Fetches from James dashboard /api/news, grouped by `topic` field
|
||||
- **Claude usage fixed**: pulls object not array from dashboard API
|
||||
|
||||
### News System
|
||||
- **Cron:** Grok 4.1 Fast every 4h (2,6,10,14,18,22 ET) for news scan
|
||||
- **Philosophy:** X/Twitter as radar → then PRIMARY SOURCE. No middlemen.
|
||||
- `topic` field added to James dashboard news API; Topics section groups by topic with emoji headers
|
||||
|
||||
### Password Manager Migration: Proton Pass → Vaultwarden
|
||||
- **Status:** Plan defined, awaiting Johan action
|
||||
- **Steps:**
|
||||
1. Johan creates account at https://vault.inou.com
|
||||
2. Export from Proton Pass (Settings → Export → CSV or JSON)
|
||||
3. Import into Vaultwarden (Bitwarden-compatible format)
|
||||
4. Set SIGNUPS_ALLOWED=false after account created
|
||||
5. Install Bitwarden app on iPhone, Mac, browser extension → point to https://vault.inou.com
|
||||
- **TODO:** rclone backup to GDrive (needs browser OAuth on Zurich)
|
||||
- Fish Audio API key + other secrets → Vaultwarden
|
||||
|
||||
### Domain Registrar Migration
|
||||
- **URGENT: jongsma.me expires 2026-02-28** — transfer to Cloudflare before auto-renewal
|
||||
- stpetersburgaquatics.com expires 2026-03-13
|
||||
- OpenProvider has ~€80 credits; migrate renewals to Cloudflare going forward
|
||||
- Full renewal schedule: zavtra(May), inou(Jun), unbelievable(Jul), x4(Aug), e-consultants(Sep), muskepo.nl(Sep), busel(Oct), localbackup(Oct), johanjongsma(Nov), 851brightwaters(Dec), flourishevents(Jan27), muskepo.com(Jan27), harryhaasjes(Feb27)
|
||||
|
||||
### DNS Migration
|
||||
- Changed NS to Cloudflare for ALL 10 remaining domains via OpenProvider API
|
||||
- Cloudflare activation checks triggered; cron job at 4:55 AM to check + add remaining 6
|
||||
- OpenProvider API creds: `~/.config/openprovider.env`
|
||||
|
||||
### Email Migration (Stalwart)
|
||||
- Tanya added to Proton Bridge (account #2, pw: dxk6YlYpRgr3Z7fw3BhXDQ)
|
||||
- Stalwart: all users recreated with `"roles":["user"]` — needed for IMAP auth
|
||||
- imapsync working via SSH tunnel (localhost:9930 → zurich:993)
|
||||
- Tanya sync was running in background (/tmp/imapsync_tanya.log) — check status
|
||||
- Self-signed cert added but Stalwart ACME override issue still pending
|
||||
|
||||
### James Dashboard
|
||||
- Running on port 9200 as dealroom.service
|
||||
- Agent chat still needs testing from Johan's Mac browser
|
||||
|
||||
### Message Center — Broken
|
||||
- M365 email fetch: ErrorInvalidUrlQueryFilter (broken since ~2/16 14:58)
|
||||
- LLM triage API key invalid (401)
|
||||
- Both need config fixes
|
||||
### BlueBubbles / iMessage
|
||||
- Mac Mini M4 (2025) is Johan's daily driver — can run BlueBubbles now
|
||||
- Setup deferred
|
||||
|
||||
## Key Context
|
||||
- **House showing prep** worked well today — HA bulk ops via K2.5 subagent (new rule in AGENTS.md)
|
||||
- **4000K** = correct color temp for the house (not 6500K)
|
||||
- **Wake permission:** 8 AM+ ET, genuinely important events only
|
||||
- **Briefings:** Johan prefers X/Twitter as primary news source
|
||||
- **OpenClaw patches:** Two source patches need reapplication after updates (scope preservation + deleted transcript indexing) — see 2026-02-16.md
|
||||
- **OpenClaw patches:** Two source patches need reapplication after updates:
|
||||
- Scope preservation patch
|
||||
- Deleted transcript indexing patch
|
||||
- See 2026-02-16.md for details
|
||||
- **Fireworks for private data** (emails, Teams); Grok OK for public news
|
||||
- **Claude Sonnet 4.6 released** — 1M context (beta), adaptive thinking, context compaction (beta), $3/$15 per M tokens
|
||||
|
||||
## Upcoming
|
||||
- **Sophia blood draw** — Tue Feb 17 at 12:00 PM, Health Link, 851 Brightwater Blvd NE, $65 (Karina)
|
||||
- Fix Message Center M365 + LLM triage
|
||||
- Build persistent TTS service
|
||||
- Test xAI/Grok in morning briefing
|
||||
## Upcoming / Open Threads
|
||||
- **jongsma.me domain transfer** (URGENT — expires 2026-02-28)
|
||||
- **Vaultwarden setup** — Johan needs to create account + import passwords
|
||||
- **rclone backup** for Vaultwarden (needs browser OAuth on Zurich)
|
||||
- **Persistent TTS service** on forge
|
||||
- **BlueBubbles setup** on Mac Mini M4
|
||||
- **Test xAI/Grok** in morning briefing
|
||||
- **Sophia blood draw** — was today (Tue Feb 17 12:00 PM), Health Link
|
||||
- Fix Stalwart ACME cert issue
|
||||
- Matrix OS — watch only, revisit when mature
|
||||
|
||||
## People
|
||||
- **Misha (Michael Muskepo)** — Johan's son, Dealspace AI co-creator with PE guys
|
||||
- **Tanya (Tatyana)** — Johan's wife, gatekeeper for smart home expansion
|
||||
- **Sophia** — daughter, blood draw tomorrow
|
||||
- **Sophia** — daughter (blood draw was today)
|
||||
- **Karina** — associated with Sophia's health link appointment ($65)
|
||||
|
||||
## Corrections Learned Today
|
||||
- **"Best over fast, always"** — Johan doesn't want fastest approach, wants best
|
||||
- **Don't bypass root cause** — removing LLM summarization was lazy; fix the prompt instead
|
||||
- **Test with curl before deploying** — always get "curl proof" before pushing code changes
|
||||
- **K2.5 chat API broken for formatting** — use completions API with few-shot pattern instead
|
||||
|
|
|
|||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,15 @@
|
|||
BEGIN:VCARD
|
||||
VERSION:3.0
|
||||
FN:Jeff Brewster
|
||||
N:Brewster;Jeff;;;
|
||||
TEL;TYPE=CELL:+1917-558-6320
|
||||
NOTE:DevOps
|
||||
CATEGORIES:myContacts
|
||||
END:VCARD
|
||||
BEGIN:VCARD
|
||||
VERSION:3.0
|
||||
FN:Omegaji
|
||||
N:;Omegaji;;;
|
||||
TEL;TYPE=CELL:+918849428284
|
||||
CATEGORIES:myContacts
|
||||
END:VCARD
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,248 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
vCard deduplicator — conservative, correct.
|
||||
|
||||
Rules (in order):
|
||||
1. Same exact email address → same person
|
||||
2. Same phone (appears ≤2 times total — personal phones only) → same person
|
||||
3. Same normalized full name (≥2 significant words) → same person
|
||||
|
||||
Post-merge guard:
|
||||
If a merged record would have >4 different email domains → bad chain merge.
|
||||
Revert: keep each original record separately.
|
||||
"""
|
||||
|
||||
import glob, re
|
||||
from collections import defaultdict
|
||||
|
||||
INPUT_DIR = "/home/johan/clawd/tmp/contacts"
|
||||
OUTPUT = "/home/johan/clawd/tmp/contacts/merged.vcf"
|
||||
|
||||
# ── parser ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def parse_vcf(path):
|
||||
with open(path, encoding='utf-8', errors='replace') as f:
|
||||
raw = re.sub(r'\r?\n[ \t]', '', f.read())
|
||||
blocks = [b for b in re.split(r'(?=BEGIN:VCARD)', raw, flags=re.I)
|
||||
if b.strip().upper().startswith('BEGIN:VCARD')]
|
||||
cards = []
|
||||
for block in blocks:
|
||||
card = defaultdict(list)
|
||||
for line in block.splitlines():
|
||||
if ':' not in line: continue
|
||||
k, _, v = line.partition(':')
|
||||
k = k.strip().upper(); v = v.strip()
|
||||
if k in ('BEGIN', 'END', 'VERSION'): continue
|
||||
card[k].append(v)
|
||||
cards.append(dict(card))
|
||||
return cards
|
||||
|
||||
# ── field helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
def get_field(card, prefix):
|
||||
for k, vs in card.items():
|
||||
if re.match(r'(ITEM\d+\.)?' + prefix + r'($|[;:])', k, re.I):
|
||||
return vs[0] if vs else ''
|
||||
return ''
|
||||
|
||||
def get_emails(card):
|
||||
out = set()
|
||||
for k, vs in card.items():
|
||||
if re.match(r'(ITEM\d+\.)?EMAIL', k, re.I):
|
||||
for v in vs:
|
||||
if v and '@' in v:
|
||||
out.add(v.strip().lower())
|
||||
return out
|
||||
|
||||
def get_phones(card):
|
||||
out = {} # norm → original
|
||||
for k, vs in card.items():
|
||||
if re.match(r'(ITEM\d+\.)?TEL', k, re.I):
|
||||
for v in vs:
|
||||
if v:
|
||||
digits = re.sub(r'\D', '', v)
|
||||
norm = digits[-9:] if len(digits) >= 9 else digits
|
||||
if norm:
|
||||
out[norm] = v.strip()
|
||||
return out
|
||||
|
||||
def get_name(card):
|
||||
fn = get_field(card, 'FN').strip()
|
||||
if not fn:
|
||||
fn = get_field(card, 'ORG').strip()
|
||||
return fn
|
||||
|
||||
def normalize_name(name):
|
||||
"""Lowercase + sort words (order-independent matching)."""
|
||||
words = re.sub(r'\s+', ' ', name.strip().lower()).split()
|
||||
return ' '.join(sorted(words))
|
||||
|
||||
def completeness(card):
|
||||
return sum(len([v for v in vs if v]) for vs in card.values())
|
||||
|
||||
def email_domains(emails):
|
||||
return set(e.split('@')[1] for e in emails if '@' in e)
|
||||
|
||||
# ── merge ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
NOISE_CATS = re.compile(
|
||||
r'^(imported on .+|restored from google.*|mycontacts)$', re.I)
|
||||
|
||||
def merge_cards(cards):
|
||||
base = max(cards, key=completeness)
|
||||
|
||||
all_emails = set()
|
||||
all_phones = {}
|
||||
all_cats = []
|
||||
for c in cards:
|
||||
all_emails |= get_emails(c)
|
||||
all_phones.update(get_phones(c))
|
||||
for k, vs in c.items():
|
||||
if k.upper() == 'CATEGORIES':
|
||||
for v in vs:
|
||||
all_cats.extend(x.strip() for x in v.split(','))
|
||||
all_cats = list(dict.fromkeys(
|
||||
c for c in all_cats if not NOISE_CATS.match(c)))
|
||||
|
||||
out = {}
|
||||
skip = ('EMAIL', 'TEL', 'CATEGORIES')
|
||||
for k, vs in base.items():
|
||||
if any(re.match(r'(ITEM\d+\.)?' + p, k, re.I) for p in skip):
|
||||
continue
|
||||
out[k] = list(vs)
|
||||
|
||||
for i, email in enumerate(sorted(all_emails)):
|
||||
out[f'ITEM{i+1}.EMAIL;TYPE=INTERNET'] = [email]
|
||||
|
||||
for i, (norm, orig) in enumerate(all_phones.items()):
|
||||
out[f'TEL_PHONE_{i}'] = [orig]
|
||||
|
||||
if all_cats:
|
||||
out['CATEGORIES'] = [','.join(all_cats)]
|
||||
|
||||
return out
|
||||
|
||||
def serialize(card):
|
||||
lines = ['BEGIN:VCARD', 'VERSION:3.0']
|
||||
priority = ['FN','N','ORG','TITLE','EMAIL','TEL','ADR','URL',
|
||||
'NOTE','BDAY','PHOTO','CATEGORIES','X-']
|
||||
def key_order(k):
|
||||
ku = k.upper()
|
||||
for i, p in enumerate(priority):
|
||||
if ku.startswith(p): return i
|
||||
return 99
|
||||
for k in sorted(card.keys(), key=key_order):
|
||||
display = re.sub(r'^TEL_PHONE_\d+$', 'TEL;TYPE=CELL', k)
|
||||
for v in card[k]:
|
||||
if v:
|
||||
lines.append(f'{display}:{v}')
|
||||
lines.append('END:VCARD')
|
||||
return '\n'.join(lines)
|
||||
|
||||
# ── union-find ────────────────────────────────────────────────────────────────
|
||||
|
||||
def make_uf(n):
|
||||
g = list(range(n))
|
||||
def find(x):
|
||||
while g[x] != x:
|
||||
g[x] = g[g[x]]; x = g[x]
|
||||
return x
|
||||
def union(a, b):
|
||||
ra, rb = find(a), find(b)
|
||||
if ra != rb: g[rb] = ra
|
||||
return find, union
|
||||
|
||||
# ── main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
files = sorted(glob.glob(f'{INPUT_DIR}/*.vcf'))
|
||||
files = [f for f in files
|
||||
if not any(x in f for x in ('merged','final','dedup','postprocess'))]
|
||||
|
||||
all_cards = []
|
||||
for f in files:
|
||||
cards = parse_vcf(f)
|
||||
print(f" {f.split('/')[-1]}: {len(cards)}")
|
||||
all_cards.extend(cards)
|
||||
n = len(all_cards)
|
||||
print(f"Total: {n}")
|
||||
|
||||
find, union = make_uf(n)
|
||||
|
||||
# ── Rule 1: same email ────────────────────────────────────────────────────
|
||||
email_map = {}
|
||||
for i, c in enumerate(all_cards):
|
||||
for e in get_emails(c):
|
||||
if e in email_map:
|
||||
union(i, email_map[e])
|
||||
else:
|
||||
email_map[e] = i
|
||||
|
||||
# ── Rule 2: same phone (personal only — skip phones in 3+ contacts) ───────
|
||||
phone_freq = defaultdict(int)
|
||||
for c in all_cards:
|
||||
for norm in get_phones(c):
|
||||
phone_freq[norm] += 1
|
||||
|
||||
phone_map = {}
|
||||
for i, c in enumerate(all_cards):
|
||||
for norm, orig in get_phones(c).items():
|
||||
if phone_freq[norm] >= 3:
|
||||
continue # shared/switchboard — skip
|
||||
if norm in phone_map:
|
||||
union(i, phone_map[norm])
|
||||
else:
|
||||
phone_map[norm] = i
|
||||
|
||||
# ── Rule 3: exact full name (≥2 significant words) ────────────────────────
|
||||
name_map = {}
|
||||
for i, c in enumerate(all_cards):
|
||||
name = get_name(c)
|
||||
sig = [w for w in name.split() if len(w) > 2]
|
||||
if len(sig) < 2:
|
||||
continue # single word / too short — skip
|
||||
key = normalize_name(name)
|
||||
if key in name_map:
|
||||
union(i, name_map[key])
|
||||
else:
|
||||
name_map[key] = i
|
||||
|
||||
# ── Group ─────────────────────────────────────────────────────────────────
|
||||
groups = defaultdict(list)
|
||||
for i, c in enumerate(all_cards):
|
||||
groups[find(i)].append(c)
|
||||
|
||||
# ── Merge + post-merge guard ──────────────────────────────────────────────
|
||||
MAX_DOMAINS = 4 # >4 email domains = chain merge gone wrong → revert
|
||||
|
||||
result = []
|
||||
reverted = 0
|
||||
dup_removed = 0
|
||||
|
||||
for root, grp in groups.items():
|
||||
if len(grp) == 1:
|
||||
result.append(grp[0])
|
||||
continue
|
||||
|
||||
merged = merge_cards(grp)
|
||||
domains = email_domains(get_emails(merged))
|
||||
|
||||
if len(domains) > MAX_DOMAINS:
|
||||
# Bad chain merge — keep each record individually
|
||||
reverted += len(grp) - 1
|
||||
for c in grp:
|
||||
result.append(c)
|
||||
else:
|
||||
dup_removed += len(grp) - 1
|
||||
result.append(merged)
|
||||
|
||||
print(f"Duplicates removed: {dup_removed}")
|
||||
print(f"Reverted (chain merges): {reverted} groups → individual records")
|
||||
print(f"Output: {len(result)}")
|
||||
|
||||
with open(OUTPUT, 'w', encoding='utf-8') as f:
|
||||
f.write('\n\n'.join(serialize(c) for c in result) + '\n')
|
||||
print(f"Written: {OUTPUT}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,155 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Post-process merged.vcf:
|
||||
1. Remove contacts with no email AND no phone
|
||||
2. Remove contacts with no name (FN or ORG) — list their emails for review
|
||||
3. Convert Dutch phone numbers (0X...) to international format (+31...)
|
||||
"""
|
||||
|
||||
import re, sys
|
||||
|
||||
INPUT = "/home/johan/clawd/tmp/contacts/merged.vcf"
|
||||
OUTPUT = "/home/johan/clawd/tmp/contacts/final.vcf"
|
||||
|
||||
# ── vCard parser (simple block-based) ─────────────────────────────────────────
|
||||
|
||||
def parse_blocks(path):
|
||||
with open(path, encoding='utf-8', errors='replace') as f:
|
||||
raw = re.sub(r'\r?\n[ \t]', '', f.read())
|
||||
blocks = []
|
||||
for block in re.split(r'(?=BEGIN:VCARD)', raw, flags=re.I):
|
||||
block = block.strip()
|
||||
if block.upper().startswith('BEGIN:VCARD'):
|
||||
blocks.append(block)
|
||||
return blocks
|
||||
|
||||
def block_lines(block):
|
||||
return [l for l in block.splitlines() if l.strip() and l.upper() not in ('BEGIN:VCARD', 'END:VCARD', 'VERSION:3.0')]
|
||||
|
||||
def get_field_val(block, prefix):
|
||||
"""First value matching field prefix."""
|
||||
for line in block.splitlines():
|
||||
k, _, v = line.partition(':')
|
||||
if re.match(r'(ITEM\d+\.)?' + prefix, k.strip(), re.I):
|
||||
return v.strip()
|
||||
return ''
|
||||
|
||||
def has_email(block):
|
||||
return bool(re.search(r'^(ITEM\d+\.)?EMAIL\b', block, re.I | re.M))
|
||||
|
||||
def has_phone(block):
|
||||
return bool(re.search(r'^(ITEM\d+\.)?TEL\b', block, re.I | re.M))
|
||||
|
||||
def get_name(block):
|
||||
fn = get_field_val(block, 'FN')
|
||||
if fn:
|
||||
return fn
|
||||
org = get_field_val(block, 'ORG')
|
||||
return org
|
||||
|
||||
def get_emails(block):
|
||||
emails = []
|
||||
for line in block.splitlines():
|
||||
k, _, v = line.partition(':')
|
||||
if re.match(r'(ITEM\d+\.)?EMAIL', k.strip(), re.I) and v.strip():
|
||||
emails.append(v.strip().lower())
|
||||
return emails
|
||||
|
||||
# ── Dutch phone normaliser ─────────────────────────────────────────────────────
|
||||
|
||||
def nl_to_intl(phone):
|
||||
"""
|
||||
Convert Dutch local format to E.164-ish international format.
|
||||
Rules:
|
||||
- Already international (+XX or 00XX) → leave alone
|
||||
- US numbers (10 digit, starts with 1 after stripping) → leave alone
|
||||
- 10-digit Dutch numbers starting with 0 → replace leading 0 with +31
|
||||
Handles spaces/dashes/dots in input, preserves spacing style.
|
||||
"""
|
||||
p = phone.strip()
|
||||
|
||||
# Already international
|
||||
if p.startswith('+') or p.startswith('00'):
|
||||
return p
|
||||
|
||||
# Strip formatting to count digits
|
||||
digits_only = re.sub(r'\D', '', p)
|
||||
|
||||
# Must start with 0 and be 10 digits to be Dutch local
|
||||
if not digits_only.startswith('0') or len(digits_only) != 10:
|
||||
return p
|
||||
|
||||
# Replace leading 0 with +31, preserve the rest of the formatting
|
||||
# e.g. "06 23 123 456" → "+31 6 23 123 456"
|
||||
# e.g. "0646438755" → "+31646438755"
|
||||
# e.g. "020-1234567" → "+3120-1234567" (Amsterdam landline)
|
||||
converted = '+31' + p[1:] # drop leading '0', prepend +31
|
||||
return converted
|
||||
|
||||
def convert_phones_in_block(block):
|
||||
lines = []
|
||||
for line in block.splitlines():
|
||||
k, _, v = line.partition(':')
|
||||
if re.match(r'(ITEM\d+\.)?TEL', k.strip(), re.I) and v.strip():
|
||||
new_v = nl_to_intl(v.strip())
|
||||
if new_v != v.strip():
|
||||
line = f'{k}:{new_v}'
|
||||
lines.append(line)
|
||||
return '\n'.join(lines)
|
||||
|
||||
# ── main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
blocks = parse_blocks(INPUT)
|
||||
print(f"Input: {len(blocks)} contacts")
|
||||
|
||||
kept = []
|
||||
removed_no_contact = 0
|
||||
removed_no_name_emails = [] # emails of nameless contacts
|
||||
|
||||
phones_converted = 0
|
||||
|
||||
for block in blocks:
|
||||
# Step 1: must have email or phone
|
||||
if not has_email(block) and not has_phone(block):
|
||||
removed_no_contact += 1
|
||||
continue
|
||||
|
||||
# Step 2: must have a name
|
||||
name = get_name(block)
|
||||
if not name or not name.strip():
|
||||
emails = get_emails(block)
|
||||
removed_no_name_emails.extend(emails if emails else ['(no email — phone only)'])
|
||||
continue
|
||||
|
||||
# Step 3: Dutch phone normalisation
|
||||
new_block = convert_phones_in_block(block)
|
||||
if new_block != block:
|
||||
phones_converted += 1
|
||||
block = new_block
|
||||
|
||||
kept.append(block)
|
||||
|
||||
print(f"Removed (no email+phone): {removed_no_contact}")
|
||||
print(f"Removed (no name): {len(removed_no_name_emails)} email(s) from nameless contacts")
|
||||
print(f"Phone numbers converted: {phones_converted}")
|
||||
print(f"Output: {len(kept)} contacts")
|
||||
|
||||
# Write final vcf
|
||||
with open(OUTPUT, 'w', encoding='utf-8') as f:
|
||||
for block in kept:
|
||||
if not block.startswith('BEGIN:VCARD'):
|
||||
block = 'BEGIN:VCARD\nVERSION:3.0\n' + block
|
||||
if not block.endswith('END:VCARD'):
|
||||
block = block + '\nEND:VCARD'
|
||||
f.write(block + '\n\n')
|
||||
|
||||
print(f"\nWritten to: {OUTPUT}")
|
||||
|
||||
if removed_no_name_emails:
|
||||
print(f"\n── Nameless contacts (emails for review) ──────────────────")
|
||||
for e in sorted(set(removed_no_name_emails)):
|
||||
print(f" {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Reference in New Issue