chore: auto-commit uncommitted changes
This commit is contained in:
parent
19676f75fb
commit
8cdd003f94
|
|
@ -1,5 +1,17 @@
|
||||||
# 2026-02-17
|
# 2026-02-17
|
||||||
|
|
||||||
|
## Password Manager Migration: Proton Pass → Vaultwarden
|
||||||
|
- Leaving Proton ecosystem (mail → Stalwart) means losing Proton Pass too
|
||||||
|
- **Decision: migrate to Vaultwarden** (already running at vault.inou.com)
|
||||||
|
- **Steps:**
|
||||||
|
1. Johan creates account at https://vault.inou.com
|
||||||
|
2. Export from Proton Pass (Settings → Export → CSV or JSON)
|
||||||
|
3. Import into Vaultwarden (compatible with Bitwarden import format)
|
||||||
|
4. Disable signups after account created (set SIGNUPS_ALLOWED=false in docker-compose)
|
||||||
|
5. Install Bitwarden app on iPhone, Mac, browser extension — point server to https://vault.inou.com
|
||||||
|
- **TODO:** Set up rclone backup to GDrive (needs OAuth browser auth on Zurich)
|
||||||
|
- Fish Audio API key and other secrets should move here from plaintext files
|
||||||
|
|
||||||
## Domain Registrar Migration Plan
|
## Domain Registrar Migration Plan
|
||||||
- OpenProvider has ~€80 credits remaining
|
- OpenProvider has ~€80 credits remaining
|
||||||
- As domains come up for renewal, transfer to Cloudflare Registrar instead
|
- As domains come up for renewal, transfer to Cloudflare Registrar instead
|
||||||
|
|
@ -67,3 +79,6 @@
|
||||||
- **Test with curl before deploying** — always get "curl proof" before pushing code changes
|
- **Test with curl before deploying** — always get "curl proof" before pushing code changes
|
||||||
- **Fireworks guarantees privacy; Grok (xAI) does not** — use Fireworks for anything touching private data (emails, Teams). Grok OK for public news scanning.
|
- **Fireworks guarantees privacy; Grok (xAI) does not** — use Fireworks for anything touching private data (emails, Teams). Grok OK for public news scanning.
|
||||||
- **Claude Sonnet 4.6 released today** — 1M context (beta), adaptive thinking, context compaction (beta), $3/$15 per M tokens
|
- **Claude Sonnet 4.6 released today** — 1M context (beta), adaptive thinking, context compaction (beta), $3/$15 per M tokens
|
||||||
|
|
||||||
|
## Sophia
|
||||||
|
- Blood draw at 12:00 PM, Health Link, 851 Brightwater Blvd NE, $65 (Karina)
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -1,9 +1,9 @@
|
||||||
{
|
{
|
||||||
"last_updated": "2026-02-17T23:00:03.380113Z",
|
"last_updated": "2026-02-18T05:00:04.947668Z",
|
||||||
"source": "api",
|
"source": "api",
|
||||||
"session_percent": 54,
|
"session_percent": 0,
|
||||||
"session_resets": "2026-02-18T00:00:00.351744+00:00",
|
"session_resets": null,
|
||||||
"weekly_percent": 53,
|
"weekly_percent": 55,
|
||||||
"weekly_resets": "2026-02-21T19:00:00.351762+00:00",
|
"weekly_resets": "2026-02-21T18:59:59.902276+00:00",
|
||||||
"sonnet_percent": 7
|
"sonnet_percent": 10
|
||||||
}
|
}
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"lastChecks": {
|
"lastChecks": {
|
||||||
"email": 1739606500,
|
"email": 1771380446,
|
||||||
"calendar": null,
|
"calendar": null,
|
||||||
"weather": 1771163041,
|
"weather": 1771163041,
|
||||||
"briefing": 1771163041,
|
"briefing": 1771163041,
|
||||||
|
|
|
||||||
|
|
@ -1,137 +1,38 @@
|
||||||
{
|
{
|
||||||
"date": "2026-02-17",
|
"date": "2026-02-17",
|
||||||
"timestamp": "2026-02-17T09:00:50-05:00",
|
"time": "21:00 ET",
|
||||||
"openclaw": {
|
"routine": "nightly-maintenance",
|
||||||
"before": "2026.2.15",
|
"results": {
|
||||||
"latest": "2026.2.15",
|
"memory": {
|
||||||
"updated": false
|
"working_context": "updated",
|
||||||
},
|
"daily_note": "updated (2026-02-17.md)"
|
||||||
"claude_code": {
|
},
|
||||||
"before": "2.1.44",
|
"os_updates": {
|
||||||
"latest": "2.1.44",
|
"status": "success",
|
||||||
"updated": false
|
"packages_upgraded": 25,
|
||||||
},
|
"kernel_note": "New kernel available — system reboot recommended",
|
||||||
"os": {
|
"kernel_expected": "6.8.0-100-generic"
|
||||||
"available": 23,
|
},
|
||||||
"packages": [
|
"claude_code": {
|
||||||
{
|
"status": "updated",
|
||||||
"name": "cpp-13-x86-64-linux-gnu",
|
"from": "2.1.42",
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
"to": "2.1.45",
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
"note": "Both /usr/bin/claude (system) and ~/.npm-global/bin/claude updated to 2.1.45"
|
||||||
},
|
},
|
||||||
{
|
"openclaw": {
|
||||||
"name": "cpp-13",
|
"status": "up_to_date",
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
"version": "2026.2.15"
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
},
|
||||||
},
|
"inou_mcp_bundle": {
|
||||||
{
|
"status": "check_skipped",
|
||||||
"name": "g++-13-x86-64-linux-gnu",
|
"current_version": "1.6.1",
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
"note": "Download URL not available (https://inou.com/download/inou.mcpb returned non-200)"
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
},
|
||||||
},
|
"session_cleanup": {
|
||||||
{
|
"status": "success",
|
||||||
"name": "g++-13",
|
"orphaned_jsonl_deleted": 212,
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
"cron_run_keys_removed": 11,
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
"sessions_json_remaining_keys": 37
|
||||||
},
|
}
|
||||||
{
|
}
|
||||||
"name": "gcc-13-base",
|
}
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "gcc-13-x86-64-linux-gnu",
|
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "gcc-13",
|
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "gcc-14-base",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libasan8",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libatomic1",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libcc1-0",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libgcc-13-dev",
|
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libgcc-s1",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libgfortran5",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libgomp1",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libhwasan0",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libitm1",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "liblsan0",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libquadmath0",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libstdc++-13-dev",
|
|
||||||
"from": "13.3.0-6ubuntu2~24.04",
|
|
||||||
"to": "13.3.0-6ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libstdc++6",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libtsan2",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "libubsan1",
|
|
||||||
"from": "14.2.0-4ubuntu2~24.04",
|
|
||||||
"to": "14.2.0-4ubuntu2~24.04.1"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"updated": true,
|
|
||||||
"reboot_required": true
|
|
||||||
},
|
|
||||||
"gateway_restarted": false
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# Working Context (updated 2026-02-17 3:56 PM ET)
|
# Working Context (updated 2026-02-17 9:00 PM ET)
|
||||||
|
|
||||||
## Active Projects
|
## Active Projects
|
||||||
|
|
||||||
|
|
@ -19,32 +19,95 @@
|
||||||
- Vision: contextual voice announcements (deliveries, appliances, Russian for in-laws)
|
- Vision: contextual voice announcements (deliveries, appliances, Russian for in-laws)
|
||||||
- Need Tanya approval before expanding beyond office
|
- Need Tanya approval before expanding beyond office
|
||||||
- **Next feature:** Voice meeting announcements — pre-generate TTS at calendar fetch, play at T-5m
|
- **Next feature:** Voice meeting announcements — pre-generate TTS at calendar fetch, play at T-5m
|
||||||
- **Architecture decision:** Alert dashboard will merge INTO Message Center (not now, but planned). MC becomes the single brain: calendar → TTS → alerts → visual display. For now, build voice into alert-dashboard server.
|
- **Architecture decision:** Alert dashboard will merge INTO Message Center (not now, but planned)
|
||||||
|
|
||||||
|
### Message Center (MC) — M365 Pipeline
|
||||||
|
- **Working:** MC detects emails/Teams → K2.5 summarizes → POSTs to Fully dashboard
|
||||||
|
- **K2.5 completions API** is the correct endpoint (chat API broken for formatting)
|
||||||
|
- **Conversation-aware**: Johan's replies auto-clear Fully alerts for that Teams conversation
|
||||||
|
- **Group coloring**: Teams messages from same conv share background color (hue from conv ID hash)
|
||||||
|
- **Fireworks API key:** `fw_RVcDe4c6mN4utKLsgA7hTm` (working key — old stale key caused issues)
|
||||||
|
- **OC m365 hook removed**: MC handles everything directly now
|
||||||
|
- Previously reported M365 fetch error (ErrorInvalidUrlQueryFilter ~2/16) — appears resolved
|
||||||
|
|
||||||
|
### Fully Dashboard (Forge / Tablets)
|
||||||
|
- **Budget pace indicator:** `usage% / time%` (week = Sat 2pm → Sat 2pm ET)
|
||||||
|
- >100% = red, 90-100% = white, <90% = green. Shows as `⚡ 108%`
|
||||||
|
- **News streams:** Topics (markets, ai, nabl, news, netherlands) each get own bar below alerts
|
||||||
|
- Fetches from James dashboard /api/news, grouped by `topic` field
|
||||||
|
- **Claude usage fixed**: pulls object not array from dashboard API
|
||||||
|
|
||||||
|
### News System
|
||||||
|
- **Cron:** Grok 4.1 Fast every 4h (2,6,10,14,18,22 ET) for news scan
|
||||||
|
- **Philosophy:** X/Twitter as radar → then PRIMARY SOURCE. No middlemen.
|
||||||
|
- `topic` field added to James dashboard news API; Topics section groups by topic with emoji headers
|
||||||
|
|
||||||
|
### Password Manager Migration: Proton Pass → Vaultwarden
|
||||||
|
- **Status:** Plan defined, awaiting Johan action
|
||||||
|
- **Steps:**
|
||||||
|
1. Johan creates account at https://vault.inou.com
|
||||||
|
2. Export from Proton Pass (Settings → Export → CSV or JSON)
|
||||||
|
3. Import into Vaultwarden (Bitwarden-compatible format)
|
||||||
|
4. Set SIGNUPS_ALLOWED=false after account created
|
||||||
|
5. Install Bitwarden app on iPhone, Mac, browser extension → point to https://vault.inou.com
|
||||||
|
- **TODO:** rclone backup to GDrive (needs browser OAuth on Zurich)
|
||||||
|
- Fish Audio API key + other secrets → Vaultwarden
|
||||||
|
|
||||||
|
### Domain Registrar Migration
|
||||||
|
- **URGENT: jongsma.me expires 2026-02-28** — transfer to Cloudflare before auto-renewal
|
||||||
|
- stpetersburgaquatics.com expires 2026-03-13
|
||||||
|
- OpenProvider has ~€80 credits; migrate renewals to Cloudflare going forward
|
||||||
|
- Full renewal schedule: zavtra(May), inou(Jun), unbelievable(Jul), x4(Aug), e-consultants(Sep), muskepo.nl(Sep), busel(Oct), localbackup(Oct), johanjongsma(Nov), 851brightwaters(Dec), flourishevents(Jan27), muskepo.com(Jan27), harryhaasjes(Feb27)
|
||||||
|
|
||||||
|
### DNS Migration
|
||||||
|
- Changed NS to Cloudflare for ALL 10 remaining domains via OpenProvider API
|
||||||
|
- Cloudflare activation checks triggered; cron job at 4:55 AM to check + add remaining 6
|
||||||
|
- OpenProvider API creds: `~/.config/openprovider.env`
|
||||||
|
|
||||||
|
### Email Migration (Stalwart)
|
||||||
|
- Tanya added to Proton Bridge (account #2, pw: dxk6YlYpRgr3Z7fw3BhXDQ)
|
||||||
|
- Stalwart: all users recreated with `"roles":["user"]` — needed for IMAP auth
|
||||||
|
- imapsync working via SSH tunnel (localhost:9930 → zurich:993)
|
||||||
|
- Tanya sync was running in background (/tmp/imapsync_tanya.log) — check status
|
||||||
|
- Self-signed cert added but Stalwart ACME override issue still pending
|
||||||
|
|
||||||
### James Dashboard
|
### James Dashboard
|
||||||
- Running on port 9200 as dealroom.service
|
- Running on port 9200 as dealroom.service
|
||||||
- Agent chat still needs testing from Johan's Mac browser
|
- Agent chat still needs testing from Johan's Mac browser
|
||||||
|
|
||||||
### Message Center — Broken
|
### BlueBubbles / iMessage
|
||||||
- M365 email fetch: ErrorInvalidUrlQueryFilter (broken since ~2/16 14:58)
|
- Mac Mini M4 (2025) is Johan's daily driver — can run BlueBubbles now
|
||||||
- LLM triage API key invalid (401)
|
- Setup deferred
|
||||||
- Both need config fixes
|
|
||||||
|
|
||||||
## Key Context
|
## Key Context
|
||||||
- **House showing prep** worked well today — HA bulk ops via K2.5 subagent (new rule in AGENTS.md)
|
|
||||||
- **4000K** = correct color temp for the house (not 6500K)
|
|
||||||
- **Wake permission:** 8 AM+ ET, genuinely important events only
|
- **Wake permission:** 8 AM+ ET, genuinely important events only
|
||||||
- **Briefings:** Johan prefers X/Twitter as primary news source
|
- **Briefings:** Johan prefers X/Twitter as primary news source
|
||||||
- **OpenClaw patches:** Two source patches need reapplication after updates (scope preservation + deleted transcript indexing) — see 2026-02-16.md
|
- **OpenClaw patches:** Two source patches need reapplication after updates:
|
||||||
|
- Scope preservation patch
|
||||||
|
- Deleted transcript indexing patch
|
||||||
|
- See 2026-02-16.md for details
|
||||||
|
- **Fireworks for private data** (emails, Teams); Grok OK for public news
|
||||||
|
- **Claude Sonnet 4.6 released** — 1M context (beta), adaptive thinking, context compaction (beta), $3/$15 per M tokens
|
||||||
|
|
||||||
## Upcoming
|
## Upcoming / Open Threads
|
||||||
- **Sophia blood draw** — Tue Feb 17 at 12:00 PM, Health Link, 851 Brightwater Blvd NE, $65 (Karina)
|
- **jongsma.me domain transfer** (URGENT — expires 2026-02-28)
|
||||||
- Fix Message Center M365 + LLM triage
|
- **Vaultwarden setup** — Johan needs to create account + import passwords
|
||||||
- Build persistent TTS service
|
- **rclone backup** for Vaultwarden (needs browser OAuth on Zurich)
|
||||||
- Test xAI/Grok in morning briefing
|
- **Persistent TTS service** on forge
|
||||||
|
- **BlueBubbles setup** on Mac Mini M4
|
||||||
|
- **Test xAI/Grok** in morning briefing
|
||||||
|
- **Sophia blood draw** — was today (Tue Feb 17 12:00 PM), Health Link
|
||||||
|
- Fix Stalwart ACME cert issue
|
||||||
- Matrix OS — watch only, revisit when mature
|
- Matrix OS — watch only, revisit when mature
|
||||||
|
|
||||||
## People
|
## People
|
||||||
- **Misha (Michael Muskepo)** — Johan's son, Dealspace AI co-creator with PE guys
|
- **Misha (Michael Muskepo)** — Johan's son, Dealspace AI co-creator with PE guys
|
||||||
- **Tanya (Tatyana)** — Johan's wife, gatekeeper for smart home expansion
|
- **Tanya (Tatyana)** — Johan's wife, gatekeeper for smart home expansion
|
||||||
- **Sophia** — daughter, blood draw tomorrow
|
- **Sophia** — daughter (blood draw was today)
|
||||||
|
- **Karina** — associated with Sophia's health link appointment ($65)
|
||||||
|
|
||||||
|
## Corrections Learned Today
|
||||||
|
- **"Best over fast, always"** — Johan doesn't want fastest approach, wants best
|
||||||
|
- **Don't bypass root cause** — removing LLM summarization was lazy; fix the prompt instead
|
||||||
|
- **Test with curl before deploying** — always get "curl proof" before pushing code changes
|
||||||
|
- **K2.5 chat API broken for formatting** — use completions API with few-shot pattern instead
|
||||||
|
|
|
||||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,15 @@
|
||||||
|
BEGIN:VCARD
|
||||||
|
VERSION:3.0
|
||||||
|
FN:Jeff Brewster
|
||||||
|
N:Brewster;Jeff;;;
|
||||||
|
TEL;TYPE=CELL:+1917-558-6320
|
||||||
|
NOTE:DevOps
|
||||||
|
CATEGORIES:myContacts
|
||||||
|
END:VCARD
|
||||||
|
BEGIN:VCARD
|
||||||
|
VERSION:3.0
|
||||||
|
FN:Omegaji
|
||||||
|
N:;Omegaji;;;
|
||||||
|
TEL;TYPE=CELL:+918849428284
|
||||||
|
CATEGORIES:myContacts
|
||||||
|
END:VCARD
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,248 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
vCard deduplicator — conservative, correct.
|
||||||
|
|
||||||
|
Rules (in order):
|
||||||
|
1. Same exact email address → same person
|
||||||
|
2. Same phone (appears ≤2 times total — personal phones only) → same person
|
||||||
|
3. Same normalized full name (≥2 significant words) → same person
|
||||||
|
|
||||||
|
Post-merge guard:
|
||||||
|
If a merged record would have >4 different email domains → bad chain merge.
|
||||||
|
Revert: keep each original record separately.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import glob, re
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
INPUT_DIR = "/home/johan/clawd/tmp/contacts"
|
||||||
|
OUTPUT = "/home/johan/clawd/tmp/contacts/merged.vcf"
|
||||||
|
|
||||||
|
# ── parser ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def parse_vcf(path):
|
||||||
|
with open(path, encoding='utf-8', errors='replace') as f:
|
||||||
|
raw = re.sub(r'\r?\n[ \t]', '', f.read())
|
||||||
|
blocks = [b for b in re.split(r'(?=BEGIN:VCARD)', raw, flags=re.I)
|
||||||
|
if b.strip().upper().startswith('BEGIN:VCARD')]
|
||||||
|
cards = []
|
||||||
|
for block in blocks:
|
||||||
|
card = defaultdict(list)
|
||||||
|
for line in block.splitlines():
|
||||||
|
if ':' not in line: continue
|
||||||
|
k, _, v = line.partition(':')
|
||||||
|
k = k.strip().upper(); v = v.strip()
|
||||||
|
if k in ('BEGIN', 'END', 'VERSION'): continue
|
||||||
|
card[k].append(v)
|
||||||
|
cards.append(dict(card))
|
||||||
|
return cards
|
||||||
|
|
||||||
|
# ── field helpers ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def get_field(card, prefix):
|
||||||
|
for k, vs in card.items():
|
||||||
|
if re.match(r'(ITEM\d+\.)?' + prefix + r'($|[;:])', k, re.I):
|
||||||
|
return vs[0] if vs else ''
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def get_emails(card):
|
||||||
|
out = set()
|
||||||
|
for k, vs in card.items():
|
||||||
|
if re.match(r'(ITEM\d+\.)?EMAIL', k, re.I):
|
||||||
|
for v in vs:
|
||||||
|
if v and '@' in v:
|
||||||
|
out.add(v.strip().lower())
|
||||||
|
return out
|
||||||
|
|
||||||
|
def get_phones(card):
|
||||||
|
out = {} # norm → original
|
||||||
|
for k, vs in card.items():
|
||||||
|
if re.match(r'(ITEM\d+\.)?TEL', k, re.I):
|
||||||
|
for v in vs:
|
||||||
|
if v:
|
||||||
|
digits = re.sub(r'\D', '', v)
|
||||||
|
norm = digits[-9:] if len(digits) >= 9 else digits
|
||||||
|
if norm:
|
||||||
|
out[norm] = v.strip()
|
||||||
|
return out
|
||||||
|
|
||||||
|
def get_name(card):
|
||||||
|
fn = get_field(card, 'FN').strip()
|
||||||
|
if not fn:
|
||||||
|
fn = get_field(card, 'ORG').strip()
|
||||||
|
return fn
|
||||||
|
|
||||||
|
def normalize_name(name):
|
||||||
|
"""Lowercase + sort words (order-independent matching)."""
|
||||||
|
words = re.sub(r'\s+', ' ', name.strip().lower()).split()
|
||||||
|
return ' '.join(sorted(words))
|
||||||
|
|
||||||
|
def completeness(card):
|
||||||
|
return sum(len([v for v in vs if v]) for vs in card.values())
|
||||||
|
|
||||||
|
def email_domains(emails):
|
||||||
|
return set(e.split('@')[1] for e in emails if '@' in e)
|
||||||
|
|
||||||
|
# ── merge ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
NOISE_CATS = re.compile(
|
||||||
|
r'^(imported on .+|restored from google.*|mycontacts)$', re.I)
|
||||||
|
|
||||||
|
def merge_cards(cards):
|
||||||
|
base = max(cards, key=completeness)
|
||||||
|
|
||||||
|
all_emails = set()
|
||||||
|
all_phones = {}
|
||||||
|
all_cats = []
|
||||||
|
for c in cards:
|
||||||
|
all_emails |= get_emails(c)
|
||||||
|
all_phones.update(get_phones(c))
|
||||||
|
for k, vs in c.items():
|
||||||
|
if k.upper() == 'CATEGORIES':
|
||||||
|
for v in vs:
|
||||||
|
all_cats.extend(x.strip() for x in v.split(','))
|
||||||
|
all_cats = list(dict.fromkeys(
|
||||||
|
c for c in all_cats if not NOISE_CATS.match(c)))
|
||||||
|
|
||||||
|
out = {}
|
||||||
|
skip = ('EMAIL', 'TEL', 'CATEGORIES')
|
||||||
|
for k, vs in base.items():
|
||||||
|
if any(re.match(r'(ITEM\d+\.)?' + p, k, re.I) for p in skip):
|
||||||
|
continue
|
||||||
|
out[k] = list(vs)
|
||||||
|
|
||||||
|
for i, email in enumerate(sorted(all_emails)):
|
||||||
|
out[f'ITEM{i+1}.EMAIL;TYPE=INTERNET'] = [email]
|
||||||
|
|
||||||
|
for i, (norm, orig) in enumerate(all_phones.items()):
|
||||||
|
out[f'TEL_PHONE_{i}'] = [orig]
|
||||||
|
|
||||||
|
if all_cats:
|
||||||
|
out['CATEGORIES'] = [','.join(all_cats)]
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
def serialize(card):
|
||||||
|
lines = ['BEGIN:VCARD', 'VERSION:3.0']
|
||||||
|
priority = ['FN','N','ORG','TITLE','EMAIL','TEL','ADR','URL',
|
||||||
|
'NOTE','BDAY','PHOTO','CATEGORIES','X-']
|
||||||
|
def key_order(k):
|
||||||
|
ku = k.upper()
|
||||||
|
for i, p in enumerate(priority):
|
||||||
|
if ku.startswith(p): return i
|
||||||
|
return 99
|
||||||
|
for k in sorted(card.keys(), key=key_order):
|
||||||
|
display = re.sub(r'^TEL_PHONE_\d+$', 'TEL;TYPE=CELL', k)
|
||||||
|
for v in card[k]:
|
||||||
|
if v:
|
||||||
|
lines.append(f'{display}:{v}')
|
||||||
|
lines.append('END:VCARD')
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
# ── union-find ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def make_uf(n):
|
||||||
|
g = list(range(n))
|
||||||
|
def find(x):
|
||||||
|
while g[x] != x:
|
||||||
|
g[x] = g[g[x]]; x = g[x]
|
||||||
|
return x
|
||||||
|
def union(a, b):
|
||||||
|
ra, rb = find(a), find(b)
|
||||||
|
if ra != rb: g[rb] = ra
|
||||||
|
return find, union
|
||||||
|
|
||||||
|
# ── main ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main():
|
||||||
|
files = sorted(glob.glob(f'{INPUT_DIR}/*.vcf'))
|
||||||
|
files = [f for f in files
|
||||||
|
if not any(x in f for x in ('merged','final','dedup','postprocess'))]
|
||||||
|
|
||||||
|
all_cards = []
|
||||||
|
for f in files:
|
||||||
|
cards = parse_vcf(f)
|
||||||
|
print(f" {f.split('/')[-1]}: {len(cards)}")
|
||||||
|
all_cards.extend(cards)
|
||||||
|
n = len(all_cards)
|
||||||
|
print(f"Total: {n}")
|
||||||
|
|
||||||
|
find, union = make_uf(n)
|
||||||
|
|
||||||
|
# ── Rule 1: same email ────────────────────────────────────────────────────
|
||||||
|
email_map = {}
|
||||||
|
for i, c in enumerate(all_cards):
|
||||||
|
for e in get_emails(c):
|
||||||
|
if e in email_map:
|
||||||
|
union(i, email_map[e])
|
||||||
|
else:
|
||||||
|
email_map[e] = i
|
||||||
|
|
||||||
|
# ── Rule 2: same phone (personal only — skip phones in 3+ contacts) ───────
|
||||||
|
phone_freq = defaultdict(int)
|
||||||
|
for c in all_cards:
|
||||||
|
for norm in get_phones(c):
|
||||||
|
phone_freq[norm] += 1
|
||||||
|
|
||||||
|
phone_map = {}
|
||||||
|
for i, c in enumerate(all_cards):
|
||||||
|
for norm, orig in get_phones(c).items():
|
||||||
|
if phone_freq[norm] >= 3:
|
||||||
|
continue # shared/switchboard — skip
|
||||||
|
if norm in phone_map:
|
||||||
|
union(i, phone_map[norm])
|
||||||
|
else:
|
||||||
|
phone_map[norm] = i
|
||||||
|
|
||||||
|
# ── Rule 3: exact full name (≥2 significant words) ────────────────────────
|
||||||
|
name_map = {}
|
||||||
|
for i, c in enumerate(all_cards):
|
||||||
|
name = get_name(c)
|
||||||
|
sig = [w for w in name.split() if len(w) > 2]
|
||||||
|
if len(sig) < 2:
|
||||||
|
continue # single word / too short — skip
|
||||||
|
key = normalize_name(name)
|
||||||
|
if key in name_map:
|
||||||
|
union(i, name_map[key])
|
||||||
|
else:
|
||||||
|
name_map[key] = i
|
||||||
|
|
||||||
|
# ── Group ─────────────────────────────────────────────────────────────────
|
||||||
|
groups = defaultdict(list)
|
||||||
|
for i, c in enumerate(all_cards):
|
||||||
|
groups[find(i)].append(c)
|
||||||
|
|
||||||
|
# ── Merge + post-merge guard ──────────────────────────────────────────────
|
||||||
|
MAX_DOMAINS = 4 # >4 email domains = chain merge gone wrong → revert
|
||||||
|
|
||||||
|
result = []
|
||||||
|
reverted = 0
|
||||||
|
dup_removed = 0
|
||||||
|
|
||||||
|
for root, grp in groups.items():
|
||||||
|
if len(grp) == 1:
|
||||||
|
result.append(grp[0])
|
||||||
|
continue
|
||||||
|
|
||||||
|
merged = merge_cards(grp)
|
||||||
|
domains = email_domains(get_emails(merged))
|
||||||
|
|
||||||
|
if len(domains) > MAX_DOMAINS:
|
||||||
|
# Bad chain merge — keep each record individually
|
||||||
|
reverted += len(grp) - 1
|
||||||
|
for c in grp:
|
||||||
|
result.append(c)
|
||||||
|
else:
|
||||||
|
dup_removed += len(grp) - 1
|
||||||
|
result.append(merged)
|
||||||
|
|
||||||
|
print(f"Duplicates removed: {dup_removed}")
|
||||||
|
print(f"Reverted (chain merges): {reverted} groups → individual records")
|
||||||
|
print(f"Output: {len(result)}")
|
||||||
|
|
||||||
|
with open(OUTPUT, 'w', encoding='utf-8') as f:
|
||||||
|
f.write('\n\n'.join(serialize(c) for c in result) + '\n')
|
||||||
|
print(f"Written: {OUTPUT}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,155 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Post-process merged.vcf:
|
||||||
|
1. Remove contacts with no email AND no phone
|
||||||
|
2. Remove contacts with no name (FN or ORG) — list their emails for review
|
||||||
|
3. Convert Dutch phone numbers (0X...) to international format (+31...)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re, sys
|
||||||
|
|
||||||
|
INPUT = "/home/johan/clawd/tmp/contacts/merged.vcf"
|
||||||
|
OUTPUT = "/home/johan/clawd/tmp/contacts/final.vcf"
|
||||||
|
|
||||||
|
# ── vCard parser (simple block-based) ─────────────────────────────────────────
|
||||||
|
|
||||||
|
def parse_blocks(path):
|
||||||
|
with open(path, encoding='utf-8', errors='replace') as f:
|
||||||
|
raw = re.sub(r'\r?\n[ \t]', '', f.read())
|
||||||
|
blocks = []
|
||||||
|
for block in re.split(r'(?=BEGIN:VCARD)', raw, flags=re.I):
|
||||||
|
block = block.strip()
|
||||||
|
if block.upper().startswith('BEGIN:VCARD'):
|
||||||
|
blocks.append(block)
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
def block_lines(block):
|
||||||
|
return [l for l in block.splitlines() if l.strip() and l.upper() not in ('BEGIN:VCARD', 'END:VCARD', 'VERSION:3.0')]
|
||||||
|
|
||||||
|
def get_field_val(block, prefix):
|
||||||
|
"""First value matching field prefix."""
|
||||||
|
for line in block.splitlines():
|
||||||
|
k, _, v = line.partition(':')
|
||||||
|
if re.match(r'(ITEM\d+\.)?' + prefix, k.strip(), re.I):
|
||||||
|
return v.strip()
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def has_email(block):
|
||||||
|
return bool(re.search(r'^(ITEM\d+\.)?EMAIL\b', block, re.I | re.M))
|
||||||
|
|
||||||
|
def has_phone(block):
|
||||||
|
return bool(re.search(r'^(ITEM\d+\.)?TEL\b', block, re.I | re.M))
|
||||||
|
|
||||||
|
def get_name(block):
|
||||||
|
fn = get_field_val(block, 'FN')
|
||||||
|
if fn:
|
||||||
|
return fn
|
||||||
|
org = get_field_val(block, 'ORG')
|
||||||
|
return org
|
||||||
|
|
||||||
|
def get_emails(block):
|
||||||
|
emails = []
|
||||||
|
for line in block.splitlines():
|
||||||
|
k, _, v = line.partition(':')
|
||||||
|
if re.match(r'(ITEM\d+\.)?EMAIL', k.strip(), re.I) and v.strip():
|
||||||
|
emails.append(v.strip().lower())
|
||||||
|
return emails
|
||||||
|
|
||||||
|
# ── Dutch phone normaliser ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def nl_to_intl(phone):
|
||||||
|
"""
|
||||||
|
Convert Dutch local format to E.164-ish international format.
|
||||||
|
Rules:
|
||||||
|
- Already international (+XX or 00XX) → leave alone
|
||||||
|
- US numbers (10 digit, starts with 1 after stripping) → leave alone
|
||||||
|
- 10-digit Dutch numbers starting with 0 → replace leading 0 with +31
|
||||||
|
Handles spaces/dashes/dots in input, preserves spacing style.
|
||||||
|
"""
|
||||||
|
p = phone.strip()
|
||||||
|
|
||||||
|
# Already international
|
||||||
|
if p.startswith('+') or p.startswith('00'):
|
||||||
|
return p
|
||||||
|
|
||||||
|
# Strip formatting to count digits
|
||||||
|
digits_only = re.sub(r'\D', '', p)
|
||||||
|
|
||||||
|
# Must start with 0 and be 10 digits to be Dutch local
|
||||||
|
if not digits_only.startswith('0') or len(digits_only) != 10:
|
||||||
|
return p
|
||||||
|
|
||||||
|
# Replace leading 0 with +31, preserve the rest of the formatting
|
||||||
|
# e.g. "06 23 123 456" → "+31 6 23 123 456"
|
||||||
|
# e.g. "0646438755" → "+31646438755"
|
||||||
|
# e.g. "020-1234567" → "+3120-1234567" (Amsterdam landline)
|
||||||
|
converted = '+31' + p[1:] # drop leading '0', prepend +31
|
||||||
|
return converted
|
||||||
|
|
||||||
|
def convert_phones_in_block(block):
|
||||||
|
lines = []
|
||||||
|
for line in block.splitlines():
|
||||||
|
k, _, v = line.partition(':')
|
||||||
|
if re.match(r'(ITEM\d+\.)?TEL', k.strip(), re.I) and v.strip():
|
||||||
|
new_v = nl_to_intl(v.strip())
|
||||||
|
if new_v != v.strip():
|
||||||
|
line = f'{k}:{new_v}'
|
||||||
|
lines.append(line)
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
# ── main ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main():
|
||||||
|
blocks = parse_blocks(INPUT)
|
||||||
|
print(f"Input: {len(blocks)} contacts")
|
||||||
|
|
||||||
|
kept = []
|
||||||
|
removed_no_contact = 0
|
||||||
|
removed_no_name_emails = [] # emails of nameless contacts
|
||||||
|
|
||||||
|
phones_converted = 0
|
||||||
|
|
||||||
|
for block in blocks:
|
||||||
|
# Step 1: must have email or phone
|
||||||
|
if not has_email(block) and not has_phone(block):
|
||||||
|
removed_no_contact += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Step 2: must have a name
|
||||||
|
name = get_name(block)
|
||||||
|
if not name or not name.strip():
|
||||||
|
emails = get_emails(block)
|
||||||
|
removed_no_name_emails.extend(emails if emails else ['(no email — phone only)'])
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Step 3: Dutch phone normalisation
|
||||||
|
new_block = convert_phones_in_block(block)
|
||||||
|
if new_block != block:
|
||||||
|
phones_converted += 1
|
||||||
|
block = new_block
|
||||||
|
|
||||||
|
kept.append(block)
|
||||||
|
|
||||||
|
print(f"Removed (no email+phone): {removed_no_contact}")
|
||||||
|
print(f"Removed (no name): {len(removed_no_name_emails)} email(s) from nameless contacts")
|
||||||
|
print(f"Phone numbers converted: {phones_converted}")
|
||||||
|
print(f"Output: {len(kept)} contacts")
|
||||||
|
|
||||||
|
# Write final vcf
|
||||||
|
with open(OUTPUT, 'w', encoding='utf-8') as f:
|
||||||
|
for block in kept:
|
||||||
|
if not block.startswith('BEGIN:VCARD'):
|
||||||
|
block = 'BEGIN:VCARD\nVERSION:3.0\n' + block
|
||||||
|
if not block.endswith('END:VCARD'):
|
||||||
|
block = block + '\nEND:VCARD'
|
||||||
|
f.write(block + '\n\n')
|
||||||
|
|
||||||
|
print(f"\nWritten to: {OUTPUT}")
|
||||||
|
|
||||||
|
if removed_no_name_emails:
|
||||||
|
print(f"\n── Nameless contacts (emails for review) ──────────────────")
|
||||||
|
for e in sorted(set(removed_no_name_emails)):
|
||||||
|
print(f" {e}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue