Switch from Claude to K2.5 via Fireworks (cheaper)

- Use Fireworks API (OpenAI-compatible) instead of Anthropic
- Model: accounts/fireworks/models/k2-5-kimi-vision
- ~10% cost of Claude for document extraction
- Update .env to use FIREWORKS_API_KEY
This commit is contained in:
Johan Jongsma 2026-02-01 17:44:18 +00:00
parent fb3d5a46b5
commit 1e51f0d518
2 changed files with 45 additions and 44 deletions

View File

@ -1,10 +1,10 @@
# Document Processor
AI-powered document management system using Claude vision for extraction and SQLite for storage/search.
AI-powered document management system using K2.5 (via Fireworks) for extraction and SQLite for storage/search.
## Features
- **AI Vision Analysis**: Uses Claude to read documents, extract text, classify, and summarize
- **AI Vision Analysis**: Uses K2.5 (Kimi via Fireworks) to read documents, extract text, classify, and summarize
- **No OCR dependencies**: Just drop files in inbox, AI handles the rest
- **SQLite Storage**: Full-text search via SQLite, embeddings ready (placeholder)
- **Auto-categorization**: Taxes, bills, medical, insurance, legal, financial, etc.
@ -20,14 +20,14 @@ python3 -m venv venv
source venv/bin/activate
# Install dependencies
pip install anthropic
pip install openai
# Configure API key (one of these methods):
# Option 1: Environment variable
export ANTHROPIC_API_KEY=sk-ant-...
export FIREWORKS_API_KEY=...
# Option 2: .env file
echo 'ANTHROPIC_API_KEY=sk-ant-...' > .env
echo 'FIREWORKS_API_KEY=...' > .env
```
## Usage
@ -99,14 +99,14 @@ journalctl --user -u doc-processor -f
## Requirements
- Python 3.10+
- `anthropic` Python package
- `openai` Python package (for Fireworks API)
- `pdftoppm` (poppler-utils) for PDF conversion
- Anthropic API key
- Fireworks API key
## API Key
The processor looks for the API key in this order:
1. `ANTHROPIC_API_KEY` environment variable
1. `FIREWORKS_API_KEY` environment variable
2. `~/dev/doc-processor/.env` file
## Embeddings

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Document Processor for ~/documents/inbox/
Uses AI vision (Claude) for document analysis. Stores embeddings in SQLite.
Uses AI vision (K2.5 via Fireworks) for document analysis. Stores embeddings in SQLite.
"""
import os
@ -19,12 +19,12 @@ from typing import Optional, Dict, Any, List
import time
import argparse
# Try to import anthropic, fail gracefully with helpful message
# Try to import openai (used for Fireworks API), fail gracefully
try:
import anthropic
from openai import OpenAI
except ImportError:
print("ERROR: anthropic package not installed")
print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install anthropic")
print("ERROR: openai package not installed")
print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install openai")
sys.exit(1)
# Paths
@ -50,26 +50,29 @@ for cat in CATEGORIES:
(RECORDS / cat).mkdir(parents=True, exist_ok=True)
def get_anthropic_client() -> anthropic.Anthropic:
"""Get Anthropic client, checking for API key."""
api_key = os.environ.get("ANTHROPIC_API_KEY")
def get_fireworks_client() -> OpenAI:
"""Get Fireworks client (OpenAI-compatible), checking for API key."""
api_key = os.environ.get("FIREWORKS_API_KEY")
if not api_key:
# Try reading from config file
config_path = Path.home() / "dev/doc-processor/.env"
if config_path.exists():
for line in config_path.read_text().splitlines():
if line.startswith("ANTHROPIC_API_KEY="):
if line.startswith("FIREWORKS_API_KEY="):
api_key = line.split("=", 1)[1].strip().strip('"\'')
break
if not api_key:
raise RuntimeError(
"ANTHROPIC_API_KEY not set. Either:\n"
" 1. Set ANTHROPIC_API_KEY environment variable\n"
" 2. Create ~/dev/doc-processor/.env with ANTHROPIC_API_KEY=sk-ant-..."
"FIREWORKS_API_KEY not set. Either:\n"
" 1. Set FIREWORKS_API_KEY environment variable\n"
" 2. Create ~/dev/doc-processor/.env with FIREWORKS_API_KEY=..."
)
return anthropic.Anthropic(api_key=api_key)
return OpenAI(
api_key=api_key,
base_url="https://api.fireworks.ai/inference/v1"
)
def init_embeddings_db():
@ -140,12 +143,12 @@ def encode_image_base64(filepath: Path) -> tuple[str, str]:
return base64.standard_b64encode(f.read()).decode('utf-8'), media_type
def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dict[str, Any]:
def analyze_document_with_ai(filepath: Path, client: OpenAI) -> Dict[str, Any]:
"""
Use Claude vision to analyze document.
Use K2.5 via Fireworks to analyze document.
Returns: {category, doc_type, date, vendor, amount, summary, full_text}
"""
print(f" Analyzing with AI...")
print(f" Analyzing with K2.5...")
try:
image_data, media_type = encode_image_base64(filepath)
@ -185,7 +188,7 @@ def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dic
5. **Summary**: 1-2 sentence description of what this document is.
Respond in JSON format:
Respond in JSON format ONLY (no markdown, no explanation):
{
"category": "...",
"doc_type": "...",
@ -197,20 +200,19 @@ Respond in JSON format:
}"""
try:
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
# K2.5 via Fireworks using OpenAI-compatible API
response = client.chat.completions.create(
model="accounts/fireworks/models/k2-5-kimi-vision",
max_tokens=8192,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": image_data,
},
"type": "image_url",
"image_url": {
"url": f"data:{media_type};base64,{image_data}"
}
},
{
"type": "text",
@ -222,7 +224,7 @@ Respond in JSON format:
)
# Parse JSON from response
text = response.content[0].text
text = response.choices[0].message.content
# Try to extract JSON from response (handle markdown code blocks)
if "```json" in text:
@ -257,13 +259,12 @@ Respond in JSON format:
}
def generate_embedding(text: str, client: anthropic.Anthropic) -> Optional[List[float]]:
def generate_embedding(text: str, client: OpenAI) -> Optional[List[float]]:
"""
Generate text embedding using Anthropic's embedding endpoint.
Note: As of 2024, Anthropic doesn't have a public embedding API.
This is a placeholder - implement with OpenAI, Voyage, or local model.
Generate text embedding.
For now, returns None and we'll use full-text search in SQLite.
Can implement with OpenAI, Voyage, or local model later.
"""
# TODO: Implement with preferred embedding provider
# Options:
@ -438,7 +439,7 @@ def export_expense(hash_id: str, classification: Dict, filepath: Path) -> None:
])
def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
def process_document(filepath: Path, client: OpenAI) -> bool:
"""Process a single document through the full pipeline."""
print(f"Processing: {filepath.name}")
@ -501,7 +502,7 @@ def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
return True
def process_inbox(client: anthropic.Anthropic) -> int:
def process_inbox(client: OpenAI) -> int:
"""Process all documents in inbox. Returns count processed."""
count = 0
for filepath in sorted(INBOX.iterdir()):
@ -516,7 +517,7 @@ def process_inbox(client: anthropic.Anthropic) -> int:
return count
def watch_inbox(client: anthropic.Anthropic, interval: int = 60) -> None:
def watch_inbox(client: OpenAI, interval: int = 60) -> None:
"""Watch inbox continuously."""
print(f"Watching {INBOX} (interval: {interval}s)")
print("Press Ctrl+C to stop")
@ -539,7 +540,7 @@ def main():
init_embeddings_db()
try:
client = get_anthropic_client()
client = get_fireworks_client()
except RuntimeError as e:
print(f"ERROR: {e}")
sys.exit(1)