Switch from Claude to K2.5 via Fireworks (cheaper)

- Use Fireworks API (OpenAI-compatible) instead of Anthropic
- Model: accounts/fireworks/models/k2-5-kimi-vision
- ~10% cost of Claude for document extraction
- Update .env to use FIREWORKS_API_KEY
This commit is contained in:
Johan Jongsma 2026-02-01 17:44:18 +00:00
parent fb3d5a46b5
commit 1e51f0d518
2 changed files with 45 additions and 44 deletions

View File

@ -1,10 +1,10 @@
# Document Processor # Document Processor
AI-powered document management system using Claude vision for extraction and SQLite for storage/search. AI-powered document management system using K2.5 (via Fireworks) for extraction and SQLite for storage/search.
## Features ## Features
- **AI Vision Analysis**: Uses Claude to read documents, extract text, classify, and summarize - **AI Vision Analysis**: Uses K2.5 (Kimi via Fireworks) to read documents, extract text, classify, and summarize
- **No OCR dependencies**: Just drop files in inbox, AI handles the rest - **No OCR dependencies**: Just drop files in inbox, AI handles the rest
- **SQLite Storage**: Full-text search via SQLite, embeddings ready (placeholder) - **SQLite Storage**: Full-text search via SQLite, embeddings ready (placeholder)
- **Auto-categorization**: Taxes, bills, medical, insurance, legal, financial, etc. - **Auto-categorization**: Taxes, bills, medical, insurance, legal, financial, etc.
@ -20,14 +20,14 @@ python3 -m venv venv
source venv/bin/activate source venv/bin/activate
# Install dependencies # Install dependencies
pip install anthropic pip install openai
# Configure API key (one of these methods): # Configure API key (one of these methods):
# Option 1: Environment variable # Option 1: Environment variable
export ANTHROPIC_API_KEY=sk-ant-... export FIREWORKS_API_KEY=...
# Option 2: .env file # Option 2: .env file
echo 'ANTHROPIC_API_KEY=sk-ant-...' > .env echo 'FIREWORKS_API_KEY=...' > .env
``` ```
## Usage ## Usage
@ -99,14 +99,14 @@ journalctl --user -u doc-processor -f
## Requirements ## Requirements
- Python 3.10+ - Python 3.10+
- `anthropic` Python package - `openai` Python package (for Fireworks API)
- `pdftoppm` (poppler-utils) for PDF conversion - `pdftoppm` (poppler-utils) for PDF conversion
- Anthropic API key - Fireworks API key
## API Key ## API Key
The processor looks for the API key in this order: The processor looks for the API key in this order:
1. `ANTHROPIC_API_KEY` environment variable 1. `FIREWORKS_API_KEY` environment variable
2. `~/dev/doc-processor/.env` file 2. `~/dev/doc-processor/.env` file
## Embeddings ## Embeddings

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Document Processor for ~/documents/inbox/ Document Processor for ~/documents/inbox/
Uses AI vision (Claude) for document analysis. Stores embeddings in SQLite. Uses AI vision (K2.5 via Fireworks) for document analysis. Stores embeddings in SQLite.
""" """
import os import os
@ -19,12 +19,12 @@ from typing import Optional, Dict, Any, List
import time import time
import argparse import argparse
# Try to import anthropic, fail gracefully with helpful message # Try to import openai (used for Fireworks API), fail gracefully
try: try:
import anthropic from openai import OpenAI
except ImportError: except ImportError:
print("ERROR: anthropic package not installed") print("ERROR: openai package not installed")
print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install anthropic") print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install openai")
sys.exit(1) sys.exit(1)
# Paths # Paths
@ -50,26 +50,29 @@ for cat in CATEGORIES:
(RECORDS / cat).mkdir(parents=True, exist_ok=True) (RECORDS / cat).mkdir(parents=True, exist_ok=True)
def get_anthropic_client() -> anthropic.Anthropic: def get_fireworks_client() -> OpenAI:
"""Get Anthropic client, checking for API key.""" """Get Fireworks client (OpenAI-compatible), checking for API key."""
api_key = os.environ.get("ANTHROPIC_API_KEY") api_key = os.environ.get("FIREWORKS_API_KEY")
if not api_key: if not api_key:
# Try reading from config file # Try reading from config file
config_path = Path.home() / "dev/doc-processor/.env" config_path = Path.home() / "dev/doc-processor/.env"
if config_path.exists(): if config_path.exists():
for line in config_path.read_text().splitlines(): for line in config_path.read_text().splitlines():
if line.startswith("ANTHROPIC_API_KEY="): if line.startswith("FIREWORKS_API_KEY="):
api_key = line.split("=", 1)[1].strip().strip('"\'') api_key = line.split("=", 1)[1].strip().strip('"\'')
break break
if not api_key: if not api_key:
raise RuntimeError( raise RuntimeError(
"ANTHROPIC_API_KEY not set. Either:\n" "FIREWORKS_API_KEY not set. Either:\n"
" 1. Set ANTHROPIC_API_KEY environment variable\n" " 1. Set FIREWORKS_API_KEY environment variable\n"
" 2. Create ~/dev/doc-processor/.env with ANTHROPIC_API_KEY=sk-ant-..." " 2. Create ~/dev/doc-processor/.env with FIREWORKS_API_KEY=..."
) )
return anthropic.Anthropic(api_key=api_key) return OpenAI(
api_key=api_key,
base_url="https://api.fireworks.ai/inference/v1"
)
def init_embeddings_db(): def init_embeddings_db():
@ -140,12 +143,12 @@ def encode_image_base64(filepath: Path) -> tuple[str, str]:
return base64.standard_b64encode(f.read()).decode('utf-8'), media_type return base64.standard_b64encode(f.read()).decode('utf-8'), media_type
def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dict[str, Any]: def analyze_document_with_ai(filepath: Path, client: OpenAI) -> Dict[str, Any]:
""" """
Use Claude vision to analyze document. Use K2.5 via Fireworks to analyze document.
Returns: {category, doc_type, date, vendor, amount, summary, full_text} Returns: {category, doc_type, date, vendor, amount, summary, full_text}
""" """
print(f" Analyzing with AI...") print(f" Analyzing with K2.5...")
try: try:
image_data, media_type = encode_image_base64(filepath) image_data, media_type = encode_image_base64(filepath)
@ -185,7 +188,7 @@ def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dic
5. **Summary**: 1-2 sentence description of what this document is. 5. **Summary**: 1-2 sentence description of what this document is.
Respond in JSON format: Respond in JSON format ONLY (no markdown, no explanation):
{ {
"category": "...", "category": "...",
"doc_type": "...", "doc_type": "...",
@ -197,20 +200,19 @@ Respond in JSON format:
}""" }"""
try: try:
response = client.messages.create( # K2.5 via Fireworks using OpenAI-compatible API
model="claude-sonnet-4-20250514", response = client.chat.completions.create(
max_tokens=4096, model="accounts/fireworks/models/k2-5-kimi-vision",
max_tokens=8192,
messages=[ messages=[
{ {
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "image", "type": "image_url",
"source": { "image_url": {
"type": "base64", "url": f"data:{media_type};base64,{image_data}"
"media_type": media_type, }
"data": image_data,
},
}, },
{ {
"type": "text", "type": "text",
@ -222,7 +224,7 @@ Respond in JSON format:
) )
# Parse JSON from response # Parse JSON from response
text = response.content[0].text text = response.choices[0].message.content
# Try to extract JSON from response (handle markdown code blocks) # Try to extract JSON from response (handle markdown code blocks)
if "```json" in text: if "```json" in text:
@ -257,18 +259,17 @@ Respond in JSON format:
} }
def generate_embedding(text: str, client: anthropic.Anthropic) -> Optional[List[float]]: def generate_embedding(text: str, client: OpenAI) -> Optional[List[float]]:
""" """
Generate text embedding using Anthropic's embedding endpoint. Generate text embedding.
Note: As of 2024, Anthropic doesn't have a public embedding API.
This is a placeholder - implement with OpenAI, Voyage, or local model.
For now, returns None and we'll use full-text search in SQLite. For now, returns None and we'll use full-text search in SQLite.
Can implement with OpenAI, Voyage, or local model later.
""" """
# TODO: Implement with preferred embedding provider # TODO: Implement with preferred embedding provider
# Options: # Options:
# 1. OpenAI text-embedding-3-small (cheap, good quality) # 1. OpenAI text-embedding-3-small (cheap, good quality)
# 2. Voyage AI (good for documents) # 2. Voyage AI (good for documents)
# 3. Local sentence-transformers # 3. Local sentence-transformers
return None return None
@ -438,7 +439,7 @@ def export_expense(hash_id: str, classification: Dict, filepath: Path) -> None:
]) ])
def process_document(filepath: Path, client: anthropic.Anthropic) -> bool: def process_document(filepath: Path, client: OpenAI) -> bool:
"""Process a single document through the full pipeline.""" """Process a single document through the full pipeline."""
print(f"Processing: {filepath.name}") print(f"Processing: {filepath.name}")
@ -501,7 +502,7 @@ def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
return True return True
def process_inbox(client: anthropic.Anthropic) -> int: def process_inbox(client: OpenAI) -> int:
"""Process all documents in inbox. Returns count processed.""" """Process all documents in inbox. Returns count processed."""
count = 0 count = 0
for filepath in sorted(INBOX.iterdir()): for filepath in sorted(INBOX.iterdir()):
@ -516,7 +517,7 @@ def process_inbox(client: anthropic.Anthropic) -> int:
return count return count
def watch_inbox(client: anthropic.Anthropic, interval: int = 60) -> None: def watch_inbox(client: OpenAI, interval: int = 60) -> None:
"""Watch inbox continuously.""" """Watch inbox continuously."""
print(f"Watching {INBOX} (interval: {interval}s)") print(f"Watching {INBOX} (interval: {interval}s)")
print("Press Ctrl+C to stop") print("Press Ctrl+C to stop")
@ -539,7 +540,7 @@ def main():
init_embeddings_db() init_embeddings_db()
try: try:
client = get_anthropic_client() client = get_fireworks_client()
except RuntimeError as e: except RuntimeError as e:
print(f"ERROR: {e}") print(f"ERROR: {e}")
sys.exit(1) sys.exit(1)