Switch from Claude to K2.5 via Fireworks (cheaper)

- Use Fireworks API (OpenAI-compatible) instead of Anthropic - Model: accounts/fireworks/models/k2-5-kimi-vision - ~10% cost of Claude for document extraction - Update .env to use FIREWORKS_API_KEY
2026-02-01 17:44:18 +00:00 · 2026-02-01 17:44:18 +00:00 · 1e51f0d518
parent fb3d5a46b5
commit 1e51f0d518
2 changed files with 45 additions and 44 deletions
--- a/README.md
+++ b/README.md
@ -1,10 +1,10 @@
 # Document Processor

-AI-powered document management system using Claude vision for extraction and SQLite for storage/search.
+AI-powered document management system using K2.5 (via Fireworks) for extraction and SQLite for storage/search.

 ## Features

- **AI Vision Analysis**: Uses Claude to read documents, extract text, classify, and summarize
+- **AI Vision Analysis**: Uses K2.5 (Kimi via Fireworks) to read documents, extract text, classify, and summarize
 - **No OCR dependencies**: Just drop files in inbox, AI handles the rest
 - **SQLite Storage**: Full-text search via SQLite, embeddings ready (placeholder)
 - **Auto-categorization**: Taxes, bills, medical, insurance, legal, financial, etc.
@ -20,14 +20,14 @@ python3 -m venv venv
 source venv/bin/activate

 # Install dependencies
-pip install anthropic
+pip install openai

 # Configure API key (one of these methods):
 # Option 1: Environment variable
-export ANTHROPIC_API_KEY=sk-ant-...
+export FIREWORKS_API_KEY=...

 # Option 2: .env file
-echo 'ANTHROPIC_API_KEY=sk-ant-...' > .env
+echo 'FIREWORKS_API_KEY=...' > .env
 ```

 ## Usage
@ -99,14 +99,14 @@ journalctl --user -u doc-processor -f
 ## Requirements

 - Python 3.10+
- `anthropic` Python package
+- `openai` Python package (for Fireworks API)
 - `pdftoppm` (poppler-utils) for PDF conversion
- Anthropic API key
+- Fireworks API key

 ## API Key

 The processor looks for the API key in this order:
-1. `ANTHROPIC_API_KEY` environment variable
+1. `FIREWORKS_API_KEY` environment variable
 2. `~/dev/doc-processor/.env` file

 ## Embeddings
--- a/processor.py
+++ b/processor.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
 Document Processor for ~/documents/inbox/
-Uses AI vision (Claude) for document analysis. Stores embeddings in SQLite.
+Uses AI vision (K2.5 via Fireworks) for document analysis. Stores embeddings in SQLite.
 """

 import os
@ -19,12 +19,12 @@ from typing import Optional, Dict, Any, List
 import time
 import argparse

-# Try to import anthropic, fail gracefully with helpful message
+# Try to import openai (used for Fireworks API), fail gracefully
 try:
-    import anthropic
+    from openai import OpenAI
 except ImportError:
-    print("ERROR: anthropic package not installed")
-    print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install anthropic")
+    print("ERROR: openai package not installed")
+    print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install openai")
    sys.exit(1)

 # Paths
@ -50,26 +50,29 @@ for cat in CATEGORIES:
    (RECORDS / cat).mkdir(parents=True, exist_ok=True)


-def get_anthropic_client() -> anthropic.Anthropic:
-    """Get Anthropic client, checking for API key."""
-    api_key = os.environ.get("ANTHROPIC_API_KEY")
+def get_fireworks_client() -> OpenAI:
+    """Get Fireworks client (OpenAI-compatible), checking for API key."""
+    api_key = os.environ.get("FIREWORKS_API_KEY")
    if not api_key:
        # Try reading from config file
        config_path = Path.home() / "dev/doc-processor/.env"
        if config_path.exists():
            for line in config_path.read_text().splitlines():
-                if line.startswith("ANTHROPIC_API_KEY="):
+                if line.startswith("FIREWORKS_API_KEY="):
                    api_key = line.split("=", 1)[1].strip().strip('"\'')
                    break
    
    if not api_key:
        raise RuntimeError(
-            "ANTHROPIC_API_KEY not set. Either:\n"
-            "  1. Set ANTHROPIC_API_KEY environment variable\n"
-            "  2. Create ~/dev/doc-processor/.env with ANTHROPIC_API_KEY=sk-ant-..."
+            "FIREWORKS_API_KEY not set. Either:\n"
+            "  1. Set FIREWORKS_API_KEY environment variable\n"
+            "  2. Create ~/dev/doc-processor/.env with FIREWORKS_API_KEY=..."
        )
    
-    return anthropic.Anthropic(api_key=api_key)
+    return OpenAI(
+        api_key=api_key,
+        base_url="https://api.fireworks.ai/inference/v1"
+    )


 def init_embeddings_db():
@ -140,12 +143,12 @@ def encode_image_base64(filepath: Path) -> tuple[str, str]:
        return base64.standard_b64encode(f.read()).decode('utf-8'), media_type


-def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dict[str, Any]:
+def analyze_document_with_ai(filepath: Path, client: OpenAI) -> Dict[str, Any]:
    """
-    Use Claude vision to analyze document.
+    Use K2.5 via Fireworks to analyze document.
    Returns: {category, doc_type, date, vendor, amount, summary, full_text}
    """
-    print(f"  Analyzing with AI...")
+    print(f"  Analyzing with K2.5...")
    
    try:
        image_data, media_type = encode_image_base64(filepath)
@ -185,7 +188,7 @@ def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dic

 5. **Summary**: 1-2 sentence description of what this document is.

-Respond in JSON format:
+Respond in JSON format ONLY (no markdown, no explanation):
 {
  "category": "...",
  "doc_type": "...",
@ -197,20 +200,19 @@ Respond in JSON format:
 }"""

    try:
-        response = client.messages.create(
-            model="claude-sonnet-4-20250514",
-            max_tokens=4096,
+        # K2.5 via Fireworks using OpenAI-compatible API
+        response = client.chat.completions.create(
+            model="accounts/fireworks/models/k2-5-kimi-vision",
+            max_tokens=8192,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
-                            "type": "image",
-                            "source": {
-                                "type": "base64",
-                                "media_type": media_type,
-                                "data": image_data,
-                            },
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:{media_type};base64,{image_data}"
+                            }
                        },
                        {
                            "type": "text",
@ -222,7 +224,7 @@ Respond in JSON format:
        )
        
        # Parse JSON from response
-        text = response.content[0].text
+        text = response.choices[0].message.content
        
        # Try to extract JSON from response (handle markdown code blocks)
        if "```json" in text:
@ -257,13 +259,12 @@ Respond in JSON format:
        }


-def generate_embedding(text: str, client: anthropic.Anthropic) -> Optional[List[float]]:
+def generate_embedding(text: str, client: OpenAI) -> Optional[List[float]]:
    """
-    Generate text embedding using Anthropic's embedding endpoint.
-    Note: As of 2024, Anthropic doesn't have a public embedding API.
-    This is a placeholder - implement with OpenAI, Voyage, or local model.
+    Generate text embedding.
    
    For now, returns None and we'll use full-text search in SQLite.
+    Can implement with OpenAI, Voyage, or local model later.
    """
    # TODO: Implement with preferred embedding provider
    # Options:
@ -438,7 +439,7 @@ def export_expense(hash_id: str, classification: Dict, filepath: Path) -> None:
        ])


-def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
+def process_document(filepath: Path, client: OpenAI) -> bool:
    """Process a single document through the full pipeline."""
    print(f"Processing: {filepath.name}")
    
@ -501,7 +502,7 @@ def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
    return True


-def process_inbox(client: anthropic.Anthropic) -> int:
+def process_inbox(client: OpenAI) -> int:
    """Process all documents in inbox. Returns count processed."""
    count = 0
    for filepath in sorted(INBOX.iterdir()):
@ -516,7 +517,7 @@ def process_inbox(client: anthropic.Anthropic) -> int:
    return count


-def watch_inbox(client: anthropic.Anthropic, interval: int = 60) -> None:
+def watch_inbox(client: OpenAI, interval: int = 60) -> None:
    """Watch inbox continuously."""
    print(f"Watching {INBOX} (interval: {interval}s)")
    print("Press Ctrl+C to stop")
@ -539,7 +540,7 @@ def main():
    init_embeddings_db()
    
    try:
-        client = get_anthropic_client()
+        client = get_fireworks_client()
    except RuntimeError as e:
        print(f"ERROR: {e}")
        sys.exit(1)