Switch from Claude to K2.5 via Fireworks (cheaper)
- Use Fireworks API (OpenAI-compatible) instead of Anthropic - Model: accounts/fireworks/models/k2-5-kimi-vision - ~10% cost of Claude for document extraction - Update .env to use FIREWORKS_API_KEY
This commit is contained in:
parent
fb3d5a46b5
commit
1e51f0d518
16
README.md
16
README.md
|
|
@ -1,10 +1,10 @@
|
|||
# Document Processor
|
||||
|
||||
AI-powered document management system using Claude vision for extraction and SQLite for storage/search.
|
||||
AI-powered document management system using K2.5 (via Fireworks) for extraction and SQLite for storage/search.
|
||||
|
||||
## Features
|
||||
|
||||
- **AI Vision Analysis**: Uses Claude to read documents, extract text, classify, and summarize
|
||||
- **AI Vision Analysis**: Uses K2.5 (Kimi via Fireworks) to read documents, extract text, classify, and summarize
|
||||
- **No OCR dependencies**: Just drop files in inbox, AI handles the rest
|
||||
- **SQLite Storage**: Full-text search via SQLite, embeddings ready (placeholder)
|
||||
- **Auto-categorization**: Taxes, bills, medical, insurance, legal, financial, etc.
|
||||
|
|
@ -20,14 +20,14 @@ python3 -m venv venv
|
|||
source venv/bin/activate
|
||||
|
||||
# Install dependencies
|
||||
pip install anthropic
|
||||
pip install openai
|
||||
|
||||
# Configure API key (one of these methods):
|
||||
# Option 1: Environment variable
|
||||
export ANTHROPIC_API_KEY=sk-ant-...
|
||||
export FIREWORKS_API_KEY=...
|
||||
|
||||
# Option 2: .env file
|
||||
echo 'ANTHROPIC_API_KEY=sk-ant-...' > .env
|
||||
echo 'FIREWORKS_API_KEY=...' > .env
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
|
@ -99,14 +99,14 @@ journalctl --user -u doc-processor -f
|
|||
## Requirements
|
||||
|
||||
- Python 3.10+
|
||||
- `anthropic` Python package
|
||||
- `openai` Python package (for Fireworks API)
|
||||
- `pdftoppm` (poppler-utils) for PDF conversion
|
||||
- Anthropic API key
|
||||
- Fireworks API key
|
||||
|
||||
## API Key
|
||||
|
||||
The processor looks for the API key in this order:
|
||||
1. `ANTHROPIC_API_KEY` environment variable
|
||||
1. `FIREWORKS_API_KEY` environment variable
|
||||
2. `~/dev/doc-processor/.env` file
|
||||
|
||||
## Embeddings
|
||||
|
|
|
|||
71
processor.py
71
processor.py
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Document Processor for ~/documents/inbox/
|
||||
Uses AI vision (Claude) for document analysis. Stores embeddings in SQLite.
|
||||
Uses AI vision (K2.5 via Fireworks) for document analysis. Stores embeddings in SQLite.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
|
@ -19,12 +19,12 @@ from typing import Optional, Dict, Any, List
|
|||
import time
|
||||
import argparse
|
||||
|
||||
# Try to import anthropic, fail gracefully with helpful message
|
||||
# Try to import openai (used for Fireworks API), fail gracefully
|
||||
try:
|
||||
import anthropic
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
print("ERROR: anthropic package not installed")
|
||||
print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install anthropic")
|
||||
print("ERROR: openai package not installed")
|
||||
print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install openai")
|
||||
sys.exit(1)
|
||||
|
||||
# Paths
|
||||
|
|
@ -50,26 +50,29 @@ for cat in CATEGORIES:
|
|||
(RECORDS / cat).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def get_anthropic_client() -> anthropic.Anthropic:
|
||||
"""Get Anthropic client, checking for API key."""
|
||||
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
def get_fireworks_client() -> OpenAI:
|
||||
"""Get Fireworks client (OpenAI-compatible), checking for API key."""
|
||||
api_key = os.environ.get("FIREWORKS_API_KEY")
|
||||
if not api_key:
|
||||
# Try reading from config file
|
||||
config_path = Path.home() / "dev/doc-processor/.env"
|
||||
if config_path.exists():
|
||||
for line in config_path.read_text().splitlines():
|
||||
if line.startswith("ANTHROPIC_API_KEY="):
|
||||
if line.startswith("FIREWORKS_API_KEY="):
|
||||
api_key = line.split("=", 1)[1].strip().strip('"\'')
|
||||
break
|
||||
|
||||
if not api_key:
|
||||
raise RuntimeError(
|
||||
"ANTHROPIC_API_KEY not set. Either:\n"
|
||||
" 1. Set ANTHROPIC_API_KEY environment variable\n"
|
||||
" 2. Create ~/dev/doc-processor/.env with ANTHROPIC_API_KEY=sk-ant-..."
|
||||
"FIREWORKS_API_KEY not set. Either:\n"
|
||||
" 1. Set FIREWORKS_API_KEY environment variable\n"
|
||||
" 2. Create ~/dev/doc-processor/.env with FIREWORKS_API_KEY=..."
|
||||
)
|
||||
|
||||
return anthropic.Anthropic(api_key=api_key)
|
||||
return OpenAI(
|
||||
api_key=api_key,
|
||||
base_url="https://api.fireworks.ai/inference/v1"
|
||||
)
|
||||
|
||||
|
||||
def init_embeddings_db():
|
||||
|
|
@ -140,12 +143,12 @@ def encode_image_base64(filepath: Path) -> tuple[str, str]:
|
|||
return base64.standard_b64encode(f.read()).decode('utf-8'), media_type
|
||||
|
||||
|
||||
def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dict[str, Any]:
|
||||
def analyze_document_with_ai(filepath: Path, client: OpenAI) -> Dict[str, Any]:
|
||||
"""
|
||||
Use Claude vision to analyze document.
|
||||
Use K2.5 via Fireworks to analyze document.
|
||||
Returns: {category, doc_type, date, vendor, amount, summary, full_text}
|
||||
"""
|
||||
print(f" Analyzing with AI...")
|
||||
print(f" Analyzing with K2.5...")
|
||||
|
||||
try:
|
||||
image_data, media_type = encode_image_base64(filepath)
|
||||
|
|
@ -185,7 +188,7 @@ def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dic
|
|||
|
||||
5. **Summary**: 1-2 sentence description of what this document is.
|
||||
|
||||
Respond in JSON format:
|
||||
Respond in JSON format ONLY (no markdown, no explanation):
|
||||
{
|
||||
"category": "...",
|
||||
"doc_type": "...",
|
||||
|
|
@ -197,20 +200,19 @@ Respond in JSON format:
|
|||
}"""
|
||||
|
||||
try:
|
||||
response = client.messages.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
max_tokens=4096,
|
||||
# K2.5 via Fireworks using OpenAI-compatible API
|
||||
response = client.chat.completions.create(
|
||||
model="accounts/fireworks/models/k2-5-kimi-vision",
|
||||
max_tokens=8192,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": media_type,
|
||||
"data": image_data,
|
||||
},
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{media_type};base64,{image_data}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
|
|
@ -222,7 +224,7 @@ Respond in JSON format:
|
|||
)
|
||||
|
||||
# Parse JSON from response
|
||||
text = response.content[0].text
|
||||
text = response.choices[0].message.content
|
||||
|
||||
# Try to extract JSON from response (handle markdown code blocks)
|
||||
if "```json" in text:
|
||||
|
|
@ -257,13 +259,12 @@ Respond in JSON format:
|
|||
}
|
||||
|
||||
|
||||
def generate_embedding(text: str, client: anthropic.Anthropic) -> Optional[List[float]]:
|
||||
def generate_embedding(text: str, client: OpenAI) -> Optional[List[float]]:
|
||||
"""
|
||||
Generate text embedding using Anthropic's embedding endpoint.
|
||||
Note: As of 2024, Anthropic doesn't have a public embedding API.
|
||||
This is a placeholder - implement with OpenAI, Voyage, or local model.
|
||||
Generate text embedding.
|
||||
|
||||
For now, returns None and we'll use full-text search in SQLite.
|
||||
Can implement with OpenAI, Voyage, or local model later.
|
||||
"""
|
||||
# TODO: Implement with preferred embedding provider
|
||||
# Options:
|
||||
|
|
@ -438,7 +439,7 @@ def export_expense(hash_id: str, classification: Dict, filepath: Path) -> None:
|
|||
])
|
||||
|
||||
|
||||
def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
|
||||
def process_document(filepath: Path, client: OpenAI) -> bool:
|
||||
"""Process a single document through the full pipeline."""
|
||||
print(f"Processing: {filepath.name}")
|
||||
|
||||
|
|
@ -501,7 +502,7 @@ def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
|
|||
return True
|
||||
|
||||
|
||||
def process_inbox(client: anthropic.Anthropic) -> int:
|
||||
def process_inbox(client: OpenAI) -> int:
|
||||
"""Process all documents in inbox. Returns count processed."""
|
||||
count = 0
|
||||
for filepath in sorted(INBOX.iterdir()):
|
||||
|
|
@ -516,7 +517,7 @@ def process_inbox(client: anthropic.Anthropic) -> int:
|
|||
return count
|
||||
|
||||
|
||||
def watch_inbox(client: anthropic.Anthropic, interval: int = 60) -> None:
|
||||
def watch_inbox(client: OpenAI, interval: int = 60) -> None:
|
||||
"""Watch inbox continuously."""
|
||||
print(f"Watching {INBOX} (interval: {interval}s)")
|
||||
print("Press Ctrl+C to stop")
|
||||
|
|
@ -539,7 +540,7 @@ def main():
|
|||
init_embeddings_db()
|
||||
|
||||
try:
|
||||
client = get_anthropic_client()
|
||||
client = get_fireworks_client()
|
||||
except RuntimeError as e:
|
||||
print(f"ERROR: {e}")
|
||||
sys.exit(1)
|
||||
|
|
|
|||
Loading…
Reference in New Issue