From 1e51f0d518b98fe24ad07d1f1edd94f29d4b3ed4 Mon Sep 17 00:00:00 2001 From: Johan Jongsma Date: Sun, 1 Feb 2026 17:44:18 +0000 Subject: [PATCH] Switch from Claude to K2.5 via Fireworks (cheaper) - Use Fireworks API (OpenAI-compatible) instead of Anthropic - Model: accounts/fireworks/models/k2-5-kimi-vision - ~10% cost of Claude for document extraction - Update .env to use FIREWORKS_API_KEY --- README.md | 16 ++++++------ processor.py | 73 ++++++++++++++++++++++++++-------------------------- 2 files changed, 45 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index e22d184..005e45f 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Document Processor -AI-powered document management system using Claude vision for extraction and SQLite for storage/search. +AI-powered document management system using K2.5 (via Fireworks) for extraction and SQLite for storage/search. ## Features -- **AI Vision Analysis**: Uses Claude to read documents, extract text, classify, and summarize +- **AI Vision Analysis**: Uses K2.5 (Kimi via Fireworks) to read documents, extract text, classify, and summarize - **No OCR dependencies**: Just drop files in inbox, AI handles the rest - **SQLite Storage**: Full-text search via SQLite, embeddings ready (placeholder) - **Auto-categorization**: Taxes, bills, medical, insurance, legal, financial, etc. @@ -20,14 +20,14 @@ python3 -m venv venv source venv/bin/activate # Install dependencies -pip install anthropic +pip install openai # Configure API key (one of these methods): # Option 1: Environment variable -export ANTHROPIC_API_KEY=sk-ant-... +export FIREWORKS_API_KEY=... # Option 2: .env file -echo 'ANTHROPIC_API_KEY=sk-ant-...' > .env +echo 'FIREWORKS_API_KEY=...' > .env ``` ## Usage @@ -99,14 +99,14 @@ journalctl --user -u doc-processor -f ## Requirements - Python 3.10+ -- `anthropic` Python package +- `openai` Python package (for Fireworks API) - `pdftoppm` (poppler-utils) for PDF conversion -- Anthropic API key +- Fireworks API key ## API Key The processor looks for the API key in this order: -1. `ANTHROPIC_API_KEY` environment variable +1. `FIREWORKS_API_KEY` environment variable 2. `~/dev/doc-processor/.env` file ## Embeddings diff --git a/processor.py b/processor.py index 8ae1cba..7bd2ec5 100755 --- a/processor.py +++ b/processor.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Document Processor for ~/documents/inbox/ -Uses AI vision (Claude) for document analysis. Stores embeddings in SQLite. +Uses AI vision (K2.5 via Fireworks) for document analysis. Stores embeddings in SQLite. """ import os @@ -19,12 +19,12 @@ from typing import Optional, Dict, Any, List import time import argparse -# Try to import anthropic, fail gracefully with helpful message +# Try to import openai (used for Fireworks API), fail gracefully try: - import anthropic + from openai import OpenAI except ImportError: - print("ERROR: anthropic package not installed") - print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install anthropic") + print("ERROR: openai package not installed") + print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install openai") sys.exit(1) # Paths @@ -50,26 +50,29 @@ for cat in CATEGORIES: (RECORDS / cat).mkdir(parents=True, exist_ok=True) -def get_anthropic_client() -> anthropic.Anthropic: - """Get Anthropic client, checking for API key.""" - api_key = os.environ.get("ANTHROPIC_API_KEY") +def get_fireworks_client() -> OpenAI: + """Get Fireworks client (OpenAI-compatible), checking for API key.""" + api_key = os.environ.get("FIREWORKS_API_KEY") if not api_key: # Try reading from config file config_path = Path.home() / "dev/doc-processor/.env" if config_path.exists(): for line in config_path.read_text().splitlines(): - if line.startswith("ANTHROPIC_API_KEY="): + if line.startswith("FIREWORKS_API_KEY="): api_key = line.split("=", 1)[1].strip().strip('"\'') break if not api_key: raise RuntimeError( - "ANTHROPIC_API_KEY not set. Either:\n" - " 1. Set ANTHROPIC_API_KEY environment variable\n" - " 2. Create ~/dev/doc-processor/.env with ANTHROPIC_API_KEY=sk-ant-..." + "FIREWORKS_API_KEY not set. Either:\n" + " 1. Set FIREWORKS_API_KEY environment variable\n" + " 2. Create ~/dev/doc-processor/.env with FIREWORKS_API_KEY=..." ) - return anthropic.Anthropic(api_key=api_key) + return OpenAI( + api_key=api_key, + base_url="https://api.fireworks.ai/inference/v1" + ) def init_embeddings_db(): @@ -140,12 +143,12 @@ def encode_image_base64(filepath: Path) -> tuple[str, str]: return base64.standard_b64encode(f.read()).decode('utf-8'), media_type -def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dict[str, Any]: +def analyze_document_with_ai(filepath: Path, client: OpenAI) -> Dict[str, Any]: """ - Use Claude vision to analyze document. + Use K2.5 via Fireworks to analyze document. Returns: {category, doc_type, date, vendor, amount, summary, full_text} """ - print(f" Analyzing with AI...") + print(f" Analyzing with K2.5...") try: image_data, media_type = encode_image_base64(filepath) @@ -185,7 +188,7 @@ def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dic 5. **Summary**: 1-2 sentence description of what this document is. -Respond in JSON format: +Respond in JSON format ONLY (no markdown, no explanation): { "category": "...", "doc_type": "...", @@ -197,20 +200,19 @@ Respond in JSON format: }""" try: - response = client.messages.create( - model="claude-sonnet-4-20250514", - max_tokens=4096, + # K2.5 via Fireworks using OpenAI-compatible API + response = client.chat.completions.create( + model="accounts/fireworks/models/k2-5-kimi-vision", + max_tokens=8192, messages=[ { "role": "user", "content": [ { - "type": "image", - "source": { - "type": "base64", - "media_type": media_type, - "data": image_data, - }, + "type": "image_url", + "image_url": { + "url": f"data:{media_type};base64,{image_data}" + } }, { "type": "text", @@ -222,7 +224,7 @@ Respond in JSON format: ) # Parse JSON from response - text = response.content[0].text + text = response.choices[0].message.content # Try to extract JSON from response (handle markdown code blocks) if "```json" in text: @@ -257,18 +259,17 @@ Respond in JSON format: } -def generate_embedding(text: str, client: anthropic.Anthropic) -> Optional[List[float]]: +def generate_embedding(text: str, client: OpenAI) -> Optional[List[float]]: """ - Generate text embedding using Anthropic's embedding endpoint. - Note: As of 2024, Anthropic doesn't have a public embedding API. - This is a placeholder - implement with OpenAI, Voyage, or local model. + Generate text embedding. For now, returns None and we'll use full-text search in SQLite. + Can implement with OpenAI, Voyage, or local model later. """ # TODO: Implement with preferred embedding provider # Options: # 1. OpenAI text-embedding-3-small (cheap, good quality) - # 2. Voyage AI (good for documents) + # 2. Voyage AI (good for documents) # 3. Local sentence-transformers return None @@ -438,7 +439,7 @@ def export_expense(hash_id: str, classification: Dict, filepath: Path) -> None: ]) -def process_document(filepath: Path, client: anthropic.Anthropic) -> bool: +def process_document(filepath: Path, client: OpenAI) -> bool: """Process a single document through the full pipeline.""" print(f"Processing: {filepath.name}") @@ -501,7 +502,7 @@ def process_document(filepath: Path, client: anthropic.Anthropic) -> bool: return True -def process_inbox(client: anthropic.Anthropic) -> int: +def process_inbox(client: OpenAI) -> int: """Process all documents in inbox. Returns count processed.""" count = 0 for filepath in sorted(INBOX.iterdir()): @@ -516,7 +517,7 @@ def process_inbox(client: anthropic.Anthropic) -> int: return count -def watch_inbox(client: anthropic.Anthropic, interval: int = 60) -> None: +def watch_inbox(client: OpenAI, interval: int = 60) -> None: """Watch inbox continuously.""" print(f"Watching {INBOX} (interval: {interval}s)") print("Press Ctrl+C to stop") @@ -539,7 +540,7 @@ def main(): init_embeddings_db() try: - client = get_anthropic_client() + client = get_fireworks_client() except RuntimeError as e: print(f"ERROR: {e}") sys.exit(1)