Switch from Claude to K2.5 via Fireworks (cheaper)
- Use Fireworks API (OpenAI-compatible) instead of Anthropic - Model: accounts/fireworks/models/k2-5-kimi-vision - ~10% cost of Claude for document extraction - Update .env to use FIREWORKS_API_KEY
This commit is contained in:
parent
fb3d5a46b5
commit
1e51f0d518
16
README.md
16
README.md
|
|
@ -1,10 +1,10 @@
|
||||||
# Document Processor
|
# Document Processor
|
||||||
|
|
||||||
AI-powered document management system using Claude vision for extraction and SQLite for storage/search.
|
AI-powered document management system using K2.5 (via Fireworks) for extraction and SQLite for storage/search.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **AI Vision Analysis**: Uses Claude to read documents, extract text, classify, and summarize
|
- **AI Vision Analysis**: Uses K2.5 (Kimi via Fireworks) to read documents, extract text, classify, and summarize
|
||||||
- **No OCR dependencies**: Just drop files in inbox, AI handles the rest
|
- **No OCR dependencies**: Just drop files in inbox, AI handles the rest
|
||||||
- **SQLite Storage**: Full-text search via SQLite, embeddings ready (placeholder)
|
- **SQLite Storage**: Full-text search via SQLite, embeddings ready (placeholder)
|
||||||
- **Auto-categorization**: Taxes, bills, medical, insurance, legal, financial, etc.
|
- **Auto-categorization**: Taxes, bills, medical, insurance, legal, financial, etc.
|
||||||
|
|
@ -20,14 +20,14 @@ python3 -m venv venv
|
||||||
source venv/bin/activate
|
source venv/bin/activate
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
pip install anthropic
|
pip install openai
|
||||||
|
|
||||||
# Configure API key (one of these methods):
|
# Configure API key (one of these methods):
|
||||||
# Option 1: Environment variable
|
# Option 1: Environment variable
|
||||||
export ANTHROPIC_API_KEY=sk-ant-...
|
export FIREWORKS_API_KEY=...
|
||||||
|
|
||||||
# Option 2: .env file
|
# Option 2: .env file
|
||||||
echo 'ANTHROPIC_API_KEY=sk-ant-...' > .env
|
echo 'FIREWORKS_API_KEY=...' > .env
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
@ -99,14 +99,14 @@ journalctl --user -u doc-processor -f
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
- Python 3.10+
|
- Python 3.10+
|
||||||
- `anthropic` Python package
|
- `openai` Python package (for Fireworks API)
|
||||||
- `pdftoppm` (poppler-utils) for PDF conversion
|
- `pdftoppm` (poppler-utils) for PDF conversion
|
||||||
- Anthropic API key
|
- Fireworks API key
|
||||||
|
|
||||||
## API Key
|
## API Key
|
||||||
|
|
||||||
The processor looks for the API key in this order:
|
The processor looks for the API key in this order:
|
||||||
1. `ANTHROPIC_API_KEY` environment variable
|
1. `FIREWORKS_API_KEY` environment variable
|
||||||
2. `~/dev/doc-processor/.env` file
|
2. `~/dev/doc-processor/.env` file
|
||||||
|
|
||||||
## Embeddings
|
## Embeddings
|
||||||
|
|
|
||||||
71
processor.py
71
processor.py
|
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Document Processor for ~/documents/inbox/
|
Document Processor for ~/documents/inbox/
|
||||||
Uses AI vision (Claude) for document analysis. Stores embeddings in SQLite.
|
Uses AI vision (K2.5 via Fireworks) for document analysis. Stores embeddings in SQLite.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
@ -19,12 +19,12 @@ from typing import Optional, Dict, Any, List
|
||||||
import time
|
import time
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
# Try to import anthropic, fail gracefully with helpful message
|
# Try to import openai (used for Fireworks API), fail gracefully
|
||||||
try:
|
try:
|
||||||
import anthropic
|
from openai import OpenAI
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("ERROR: anthropic package not installed")
|
print("ERROR: openai package not installed")
|
||||||
print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install anthropic")
|
print("Run: cd ~/dev/doc-processor && source venv/bin/activate && pip install openai")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Paths
|
# Paths
|
||||||
|
|
@ -50,26 +50,29 @@ for cat in CATEGORIES:
|
||||||
(RECORDS / cat).mkdir(parents=True, exist_ok=True)
|
(RECORDS / cat).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def get_anthropic_client() -> anthropic.Anthropic:
|
def get_fireworks_client() -> OpenAI:
|
||||||
"""Get Anthropic client, checking for API key."""
|
"""Get Fireworks client (OpenAI-compatible), checking for API key."""
|
||||||
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
api_key = os.environ.get("FIREWORKS_API_KEY")
|
||||||
if not api_key:
|
if not api_key:
|
||||||
# Try reading from config file
|
# Try reading from config file
|
||||||
config_path = Path.home() / "dev/doc-processor/.env"
|
config_path = Path.home() / "dev/doc-processor/.env"
|
||||||
if config_path.exists():
|
if config_path.exists():
|
||||||
for line in config_path.read_text().splitlines():
|
for line in config_path.read_text().splitlines():
|
||||||
if line.startswith("ANTHROPIC_API_KEY="):
|
if line.startswith("FIREWORKS_API_KEY="):
|
||||||
api_key = line.split("=", 1)[1].strip().strip('"\'')
|
api_key = line.split("=", 1)[1].strip().strip('"\'')
|
||||||
break
|
break
|
||||||
|
|
||||||
if not api_key:
|
if not api_key:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"ANTHROPIC_API_KEY not set. Either:\n"
|
"FIREWORKS_API_KEY not set. Either:\n"
|
||||||
" 1. Set ANTHROPIC_API_KEY environment variable\n"
|
" 1. Set FIREWORKS_API_KEY environment variable\n"
|
||||||
" 2. Create ~/dev/doc-processor/.env with ANTHROPIC_API_KEY=sk-ant-..."
|
" 2. Create ~/dev/doc-processor/.env with FIREWORKS_API_KEY=..."
|
||||||
)
|
)
|
||||||
|
|
||||||
return anthropic.Anthropic(api_key=api_key)
|
return OpenAI(
|
||||||
|
api_key=api_key,
|
||||||
|
base_url="https://api.fireworks.ai/inference/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def init_embeddings_db():
|
def init_embeddings_db():
|
||||||
|
|
@ -140,12 +143,12 @@ def encode_image_base64(filepath: Path) -> tuple[str, str]:
|
||||||
return base64.standard_b64encode(f.read()).decode('utf-8'), media_type
|
return base64.standard_b64encode(f.read()).decode('utf-8'), media_type
|
||||||
|
|
||||||
|
|
||||||
def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dict[str, Any]:
|
def analyze_document_with_ai(filepath: Path, client: OpenAI) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Use Claude vision to analyze document.
|
Use K2.5 via Fireworks to analyze document.
|
||||||
Returns: {category, doc_type, date, vendor, amount, summary, full_text}
|
Returns: {category, doc_type, date, vendor, amount, summary, full_text}
|
||||||
"""
|
"""
|
||||||
print(f" Analyzing with AI...")
|
print(f" Analyzing with K2.5...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
image_data, media_type = encode_image_base64(filepath)
|
image_data, media_type = encode_image_base64(filepath)
|
||||||
|
|
@ -185,7 +188,7 @@ def analyze_document_with_ai(filepath: Path, client: anthropic.Anthropic) -> Dic
|
||||||
|
|
||||||
5. **Summary**: 1-2 sentence description of what this document is.
|
5. **Summary**: 1-2 sentence description of what this document is.
|
||||||
|
|
||||||
Respond in JSON format:
|
Respond in JSON format ONLY (no markdown, no explanation):
|
||||||
{
|
{
|
||||||
"category": "...",
|
"category": "...",
|
||||||
"doc_type": "...",
|
"doc_type": "...",
|
||||||
|
|
@ -197,20 +200,19 @@ Respond in JSON format:
|
||||||
}"""
|
}"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.messages.create(
|
# K2.5 via Fireworks using OpenAI-compatible API
|
||||||
model="claude-sonnet-4-20250514",
|
response = client.chat.completions.create(
|
||||||
max_tokens=4096,
|
model="accounts/fireworks/models/k2-5-kimi-vision",
|
||||||
|
max_tokens=8192,
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "image",
|
"type": "image_url",
|
||||||
"source": {
|
"image_url": {
|
||||||
"type": "base64",
|
"url": f"data:{media_type};base64,{image_data}"
|
||||||
"media_type": media_type,
|
}
|
||||||
"data": image_data,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
|
|
@ -222,7 +224,7 @@ Respond in JSON format:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse JSON from response
|
# Parse JSON from response
|
||||||
text = response.content[0].text
|
text = response.choices[0].message.content
|
||||||
|
|
||||||
# Try to extract JSON from response (handle markdown code blocks)
|
# Try to extract JSON from response (handle markdown code blocks)
|
||||||
if "```json" in text:
|
if "```json" in text:
|
||||||
|
|
@ -257,13 +259,12 @@ Respond in JSON format:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def generate_embedding(text: str, client: anthropic.Anthropic) -> Optional[List[float]]:
|
def generate_embedding(text: str, client: OpenAI) -> Optional[List[float]]:
|
||||||
"""
|
"""
|
||||||
Generate text embedding using Anthropic's embedding endpoint.
|
Generate text embedding.
|
||||||
Note: As of 2024, Anthropic doesn't have a public embedding API.
|
|
||||||
This is a placeholder - implement with OpenAI, Voyage, or local model.
|
|
||||||
|
|
||||||
For now, returns None and we'll use full-text search in SQLite.
|
For now, returns None and we'll use full-text search in SQLite.
|
||||||
|
Can implement with OpenAI, Voyage, or local model later.
|
||||||
"""
|
"""
|
||||||
# TODO: Implement with preferred embedding provider
|
# TODO: Implement with preferred embedding provider
|
||||||
# Options:
|
# Options:
|
||||||
|
|
@ -438,7 +439,7 @@ def export_expense(hash_id: str, classification: Dict, filepath: Path) -> None:
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
|
def process_document(filepath: Path, client: OpenAI) -> bool:
|
||||||
"""Process a single document through the full pipeline."""
|
"""Process a single document through the full pipeline."""
|
||||||
print(f"Processing: {filepath.name}")
|
print(f"Processing: {filepath.name}")
|
||||||
|
|
||||||
|
|
@ -501,7 +502,7 @@ def process_document(filepath: Path, client: anthropic.Anthropic) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def process_inbox(client: anthropic.Anthropic) -> int:
|
def process_inbox(client: OpenAI) -> int:
|
||||||
"""Process all documents in inbox. Returns count processed."""
|
"""Process all documents in inbox. Returns count processed."""
|
||||||
count = 0
|
count = 0
|
||||||
for filepath in sorted(INBOX.iterdir()):
|
for filepath in sorted(INBOX.iterdir()):
|
||||||
|
|
@ -516,7 +517,7 @@ def process_inbox(client: anthropic.Anthropic) -> int:
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
def watch_inbox(client: anthropic.Anthropic, interval: int = 60) -> None:
|
def watch_inbox(client: OpenAI, interval: int = 60) -> None:
|
||||||
"""Watch inbox continuously."""
|
"""Watch inbox continuously."""
|
||||||
print(f"Watching {INBOX} (interval: {interval}s)")
|
print(f"Watching {INBOX} (interval: {interval}s)")
|
||||||
print("Press Ctrl+C to stop")
|
print("Press Ctrl+C to stop")
|
||||||
|
|
@ -539,7 +540,7 @@ def main():
|
||||||
init_embeddings_db()
|
init_embeddings_db()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client = get_anthropic_client()
|
client = get_fireworks_client()
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
print(f"ERROR: {e}")
|
print(f"ERROR: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue